Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Wed, 14 Mar 2012 15:27:39 -0300
From: Claudio André <claudioandre.br@...il.com>
To: john-dev@...ts.openwall.com
Subject: New function in common-opencl

Hi, i need a new function to check if LWS is ok in the running hardware 
(need it to optimize).

I put in common-opencl (seems the best place to be). Any complains?

Complete patch attached if someone wants to see it.
----

cl_ulong get_local_memory_size(int dev_id)
{
         cl_ulong size;
         HANDLE_CLERROR(clGetDeviceInfo(devices[dev_id], 
CL_DEVICE_LOCAL_MEM_SIZE,
                 sizeof(cl_ulong), &size, 0),
                 "Error querying CL_DEVICE_LOCAL_MEM_SIZE");

         return size;
}


Claudio

>>From 843f9a9d43b921ac9bc539703e765cdb2da967b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Claudio=20Andr=C3=A9?= <claudio@...udioandre-desktop.(none)>
Date: Wed, 14 Mar 2012 14:58:42 -0300
Subject: [PATCH] Improvement: get the LWS and KPC from john's config file.
 Improvement: check if group size is valid. Cosmetic:
 changes in benchmark comment. See commit: 9d3fe1414e.
 Performance: put all working variables in __local memory
 address space. Fixed: format and algorithm names. Fixed:
 copyright information.

---
 run/john.conf                    |    4 +
 src/common-opencl.c              |   10 ++
 src/opencl/cryptsha512_kernel.cl |  184 ++++++++++++++++++++++++--------------
 src/opencl_cryptsha512.h         |   28 +++++-
 src/opencl_cryptsha512_fmt.c     |  113 +++++++++++++++--------
 5 files changed, 230 insertions(+), 109 deletions(-)

diff --git a/run/john.conf b/run/john.conf
index 0304c5b..3f341eb 100644
--- a/run/john.conf
+++ b/run/john.conf
@@ -81,6 +81,10 @@ Device = 0
 #ssha_LWS = 512
 #ssha_KPC = 8192
 
+# For Crypt sha-512.
+#cryptsha512_LWS = 64
+#cryptsha512_KPC = 8192
+
 
 # A user defined character class is named with a single digit, ie. 0..9. After
 # the equal-sign, just list all characters that this class should match. You
diff --git a/src/common-opencl.c b/src/common-opencl.c
index 96dfec2..77a50f8 100644
--- a/src/common-opencl.c
+++ b/src/common-opencl.c
@@ -127,6 +127,16 @@ void opencl_init(char *kernel_filename, unsigned int dev_id,
 	build_kernel(dev_id);
 }
 
+cl_ulong get_local_memory_size(int dev_id)
+{    
+        cl_ulong size;        
+        HANDLE_CLERROR(clGetDeviceInfo(devices[dev_id], CL_DEVICE_LOCAL_MEM_SIZE,
+                sizeof(cl_ulong), &size, 0),
+                "Error querying CL_DEVICE_LOCAL_MEM_SIZE");    
+        
+        return size;
+}
+
 char *get_error_name(cl_int cl_error)
 {
 	static char *err_1[] =
diff --git a/src/opencl/cryptsha512_kernel.cl b/src/opencl/cryptsha512_kernel.cl
index 95e1f12..11f27dc 100644
--- a/src/opencl/cryptsha512_kernel.cl
+++ b/src/opencl/cryptsha512_kernel.cl
@@ -1,11 +1,17 @@
 /*
-* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
-* and it is hereby released to the general public under the following terms:
-* Redistribution and use in source and binary forms, with or without modification, are permitted.
-*/
+ * Developed by Claudio André <claudio.andre at correios.net.br> in 2012   
+ * Based on source code provided by Lukas Odzioba
+ *
+ * This software is:
+ * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+ * Copyright (c) 2012 Claudio André <claudio.andre at correios.net.br>
+ * and it is hereby released to the general public under the following terms:
+ * Redistribution and use in source and binary forms, with or without modification, are permitted.
+ * 
+ * This program comes with ABSOLUTELY NO WARRANTY; express or implied .
+ */
 
 #include "opencl_cryptsha512.h"
-//#pragma OPENCL EXTENSION cl_amd_printf : enable
 
 __constant uint64_t k[] = {
     0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
@@ -30,7 +36,7 @@ __constant uint64_t k[] = {
     0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
 };
 
-void init_ctx(sha512_ctx * ctx) {
+void init_ctx(__local sha512_ctx * ctx) {
     ctx->H[0] = 0x6a09e667f3bcc908UL;
     ctx->H[1] = 0xbb67ae8584caa73bUL;
     ctx->H[2] = 0x3c6ef372fe94f82bUL;
@@ -43,24 +49,27 @@ void init_ctx(sha512_ctx * ctx) {
     ctx->buflen = 0;
 }
 
-void memcpy_1(uint8_t * dest, const uint8_t * src, const size_t n) {
+void memcpy_08(__local uint8_t * dest, __local const uint8_t * src, const size_t n) {
     for (int i = 0; i < n; i++)
         dest[i] = src[i];
 }
 
-void memcpy (uint8_t * dest, buffer_64 * src, const size_t n) {
+void memcpy_64(__local uint8_t * dest, __local buffer_64 * src, const size_t n) {
     for (int i = 0; i < n; i++)
         dest[i] = src->mem_08[i];
 }
 
-void insert_to_buffer(sha512_ctx * ctx, const uint8_t * string,
+void insert_to_buffer(__local sha512_ctx * ctx, 
+                      __local const uint8_t * string,
                       const uint8_t len) {
-    uint8_t *d = ctx->buffer->mem_08 + ctx->buflen;  //Position ctx->buffer[buflen] (in char size)
-    memcpy_1(d, string, len);
+    __local uint8_t *d;
+    d = ctx->buffer->mem_08 + ctx->buflen;  //ctx->buffer[buflen] (in char size)
+
+    memcpy_08(d, string, len); //TODO: remove the call to other procedure??
     ctx->buflen += len;
 }
 
-void sha512_block(sha512_ctx * ctx) {
+void sha512_block(__local sha512_ctx * ctx) {
     int i;
     uint64_t a = ctx->H[0];
     uint64_t b = ctx->H[1];
@@ -73,7 +82,8 @@ void sha512_block(sha512_ctx * ctx) {
 
     uint64_t w[16];
 
-    uint64_t *data = ctx->buffer->mem_64;  //The same as buffer[0]
+    __local uint64_t *data = ctx->buffer->mem_64;  //The same as buffer[0]
+
     //#pragma unroll 16
     for (i = 0; i < 16; i++)
         w[i] = SWAP64(data[i]);
@@ -94,7 +104,7 @@ void sha512_block(sha512_ctx * ctx) {
         a = t1 + t2;
     }
 
-
+    //#pragma unroll 64
     for (i = 16; i < 80; i++) {
         w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
         t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
@@ -120,10 +130,10 @@ void sha512_block(sha512_ctx * ctx) {
     ctx->H[7] += h;
 }
 
-void ctx_append_1(sha512_ctx * ctx) {
+void ctx_append_1(__local sha512_ctx * ctx) {
     uint32_t length = ctx->buflen;
     int i = 127 - length;
-    uint8_t *d = ctx->buffer->mem_08 + length;
+    __local uint8_t *d = ctx->buffer->mem_08 + length;
     *d++ = 0x80;
 
     while (i--) {
@@ -143,18 +153,18 @@ void ctx_append_1(sha512_ctx * ctx) {
 */
 }
 
-void ctx_add_length(sha512_ctx * ctx) {
-    uint64_t *blocks = ctx->buffer->mem_64;
+void ctx_add_length(__local sha512_ctx * ctx) {
+    __local uint64_t *blocks = ctx->buffer->mem_64;
     blocks[15] = SWAP64((uint64_t) (ctx->total * 8));
 }
 
-void finish_ctx(sha512_ctx * ctx) {
+void finish_ctx(__local sha512_ctx * ctx) {
     ctx_append_1(ctx);
     ctx_add_length(ctx);
     ctx->buflen = 0;
 }
 
-void ctx_update(sha512_ctx * ctx, uint8_t *string, uint8_t len) {
+void ctx_update(__local sha512_ctx * ctx, __local uint8_t *string, uint8_t len) {
     ctx->total += len;
     uint8_t startpos = ctx->buflen;
     uint8_t partsize;
@@ -172,17 +182,17 @@ void ctx_update(sha512_ctx * ctx, uint8_t *string, uint8_t len) {
     }
 }
 
-void clear_ctx_buffer(sha512_ctx * ctx) {
+void clear_ctx_buffer(__local sha512_ctx * ctx) {
 
-    uint32_t *w = ctx->buffer->mem_32;
-    //#pragma unroll 30
-    for (int i = 0; i < 30; i++) //TODO: why 30? Not 32?
+    __local uint32_t *w = ctx->buffer->mem_32;
+    //#pragma unroll 32
+    for (int i = 0; i < 32; i++)
         w[i] = 0;
 
     ctx->buflen = 0;
 }
 
-void sha512_digest(sha512_ctx * ctx, uint64_t * result) {
+void sha512_digest(__local sha512_ctx * ctx, __local uint64_t * result) {
     uint8_t i;
     if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block
         finish_ctx(ctx);
@@ -205,27 +215,54 @@ void sha512_digest(sha512_ctx * ctx, uint64_t * result) {
         result[i] = SWAP64(ctx->H[i]);
 }
 
-void sha512crypt(uint8_t *pass, uint8_t passlength,
-                 crypt_sha512_salt cuda_salt, 
+void cmp_parallel(__local working_memory * tmp_working,
+                 __global crypt_sha512_hash * output){
+//Input: binary calculated hash (NEW)
+//Outpu: integet                (Change output size and type) 
+    if (output->v[0] == tmp_working->alt_result[0].mem_64[0] && 
+        output->v[1] == tmp_working->alt_result[1].mem_64[0] && 
+        output->v[2] == tmp_working->alt_result[2].mem_64[0] && 
+        output->v[3] == tmp_working->alt_result[3].mem_64[0] && 
+        output->v[4] == tmp_working->alt_result[4].mem_64[0] && 
+        output->v[5] == tmp_working->alt_result[5].mem_64[0] && 
+        output->v[6] == tmp_working->alt_result[6].mem_64[0] && 
+        output->v[7] == tmp_working->alt_result[7].mem_64[0]) {
+
+        //Write back the solution.
+        write_mem_fence(CLK_GLOBAL_MEM_FENCE);
+        output->v[7] = get_global_id(0);
+    }
+}
+
+void sha512crypt(__local working_memory * tmp_working,
                  __global crypt_sha512_hash * output) {
 
-    buffer_64 alt_result[8], temp_result[8];
-    int i;
-    sha512_ctx ctx;
+#define pass        tmp_working->pass_info.v
+#define passlength  tmp_working->pass_info.length
+#define salt        tmp_working->salt_info.salt
+#define saltlen     tmp_working->salt_info.saltlen
+#define rounds      tmp_working->salt_info.rounds
+#define alt_result  tmp_working->alt_result
+#define temp_result tmp_working->temp_result
+#define s_sequence  tmp_working->s_sequence
+#define p_sequence  tmp_working->p_sequence
+#define ctx         tmp_working->ctx_info
+
+    //sha512_ctx ctx;
     init_ctx(&ctx);
 
     ctx_update(&ctx, pass, passlength);
-    ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen);
+    ctx_update(&ctx, salt, saltlen);
     ctx_update(&ctx, pass, passlength);
 
     sha512_digest(&ctx, alt_result->mem_64);
     init_ctx(&ctx);
 
     ctx_update(&ctx, pass, passlength);
-    ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen);
+    ctx_update(&ctx, salt, saltlen);
     ctx_update(&ctx, alt_result->mem_08, passlength);
 
-    for (i = passlength; i > 0; i >>= 1) {
+    for (int i = passlength; i > 0; i >>= 1) {
         if ((i & 1) != 0)
             ctx_update(&ctx, alt_result->mem_08, 64);
         else
@@ -234,33 +271,25 @@ void sha512crypt(uint8_t *pass, uint8_t passlength,
     sha512_digest(&ctx, alt_result->mem_64);
     init_ctx(&ctx);
 
-    for (i = 0; i < passlength; i++)
+    for (int i = 0; i < passlength; i++)
         ctx_update(&ctx, pass, passlength);
 
     sha512_digest(&ctx, temp_result->mem_64);
-
-    uint8_t sp_sequence[16 + 4];
-    uint8_t *p_sequence = sp_sequence;
-    memcpy(p_sequence, temp_result, passlength);
+    memcpy_64(p_sequence, temp_result, passlength);
 
     init_ctx(&ctx);
     
     /* For every character in the password add the entire password.  */
-    for (i = 0; i < 16 + (alt_result->mem_08)[0]; i++)  //Analyse, TÁ CERTO?###
-        ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen);
+    for (int i = 0; i < 16 + (alt_result->mem_08)[0]; i++)
+        ctx_update(&ctx, salt, saltlen);
 
     /* Finish the digest.  */
     sha512_digest(&ctx, temp_result->mem_64);
-
-    uint8_t saltlength = cuda_salt.saltlen;
-
-    uint8_t ss_sequence[16 + 4];
-    uint8_t *s_sequence = ss_sequence;
-    memcpy(s_sequence, temp_result, saltlength);
+    memcpy_64(s_sequence, temp_result, saltlen);
 
     /* Repeatedly run the collected hash value through SHA512 to
        burn CPU cycles.  */
-    for (i = 0; i < cuda_salt.rounds; i++) {
+    for (int i = 0; i < rounds; i++) {
         init_ctx(&ctx);
 
         if ((i & 1) != 0)
@@ -269,7 +298,7 @@ void sha512crypt(uint8_t *pass, uint8_t passlength,
             ctx_update(&ctx, alt_result->mem_08, 64);  
 
         if ((i % 3) != 0)
-            ctx_update(&ctx, s_sequence, saltlength);
+            ctx_update(&ctx, s_sequence, saltlen);
 
         if ((i % 7) != 0)
             ctx_update(&ctx, p_sequence, passlength);
@@ -283,33 +312,56 @@ void sha512crypt(uint8_t *pass, uint8_t passlength,
     }
     //Send results to the host.
     //#pragma unroll 8
-    for (i = 0; i < 8; i++)
-        output->v[i] = alt_result[i].mem_64[0];
+    for (int i = 0; i < 8; i++)
+        output->v[i] = alt_result[i].mem_64[0];  
 }
+#undef salt       
+#undef saltlen    
+#undef rounds   
 
 __kernel void kernel_crypt(__constant crypt_sha512_salt * hsalt,
                            __constant crypt_sha512_password * inbuffer,
-                           __global   crypt_sha512_hash * outbuffer) {
-
-    uint8_t pass[PLAINTEXT_LENGTH];
-    crypt_sha512_salt salt_data;
+                           __global   crypt_sha512_hash * outbuffer,
+                           __local    working_memory * tmp_memory) {
 
     //Get the task to be done
-    uint32_t idx = get_global_id(0);
+    uint32_t gid = get_global_id(0);
+    uint32_t lid = get_local_id(0);
 
-    //Use fast memory.
+    //Transfer data to faster memory
+    //Password information
+    tmp_memory[lid].pass_info.length = inbuffer[gid].length;
 
-    //Get password information, put in faster memory.
-    for (int i = 0; i < inbuffer[idx].length; i++)
-        pass[i] = inbuffer[idx].v[i]; 
-    
-    //Get salt information, put in faster memory.
-    salt_data.saltlen = hsalt->saltlen;
-    salt_data.rounds = hsalt->rounds;
+    for (int i = 0; i < tmp_memory[lid].pass_info.length; i++)
+        tmp_memory[lid].pass_info.v[i] = inbuffer[gid].v[i]; 
+ 
+    //Salt information. Maybe use __constant someday.
+    tmp_memory[lid].salt_info.saltlen = hsalt->saltlen;  //TODO: Tirar o salt da working e usar um nova variável.
+    tmp_memory[lid].salt_info.rounds = hsalt->rounds;
 
-    for (int i = 0; i < salt_data.saltlen; i++)
-	salt_data.salt[i] = hsalt->salt[i];
+    for (int i = 0; i < tmp_memory[lid].salt_info.saltlen; i++)
+	tmp_memory[lid].salt_info.salt[i] = hsalt->salt[i];
 
     //Do the job
-    sha512crypt(pass, inbuffer[idx].length, salt_data, &outbuffer[idx]);
+    sha512crypt(&tmp_memory[lid], &outbuffer[gid]);
 }
+
+/***
+*    To improve performance, it uses __local memory to keep working variables 
+* (password, temp buffers, etc). In SHA 512 it means about 350 bytes per 
+* "thread". It improves performance a lot, but, local memory is a scarce 
+* resource. 
+*    It means the max group size allowed in OpenCL SHA 512 is going to be 
+* 128 (hardware depended).
+*
+* Gain   Optimizations
+*  --    Basic version, private and global variables only.
+*        Transfer all the working variables to local memory.
+*        Move salt to constant memory space. Keep others in local (saves memory).
+*        Do the compare task in GPU
+*
+* Conclusions
+* - 
+* -
+* - 
+***/
\ No newline at end of file
diff --git a/src/opencl_cryptsha512.h b/src/opencl_cryptsha512.h
index f8f1852..ae0d954 100644
--- a/src/opencl_cryptsha512.h
+++ b/src/opencl_cryptsha512.h
@@ -1,12 +1,20 @@
 /*
-* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
-* and it is hereby released to the general public under the following terms:
-* Redistribution and use in source and binary forms, with or without modification, are permitted.
-*/
+ * Developed by Claudio André <claudio.andre at correios.net.br> in 2012   
+ * Based on source code provided by Lukas Odzioba
+ *
+ * This software is:
+ * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+ * Copyright (c) 2012 Claudio André <claudio.andre at correios.net.br>
+ * and it is hereby released to the general public under the following terms:
+ * Redistribution and use in source and binary forms, with or without modification, are permitted.
+ * 
+ * This program comes with ABSOLUTELY NO WARRANTY; express or implied .
+ */
+
 #ifndef _CRYPTSHA512_H 
 #define _CRYPTSHA512_H
 
-//Type names definition. ///TODO: move to a new file and share this new file where needed.
+//Type names definition. 
 #define uint8_t  unsigned char
 #define uint16_t unsigned short
 #define uint32_t unsigned int
@@ -76,4 +84,14 @@ typedef struct {
 	uint64_t v[8];		//512 bits
 } crypt_sha512_hash;
 
+typedef struct {
+        crypt_sha512_password  pass_info;
+        crypt_sha512_salt      salt_info;
+        sha512_ctx             ctx_info;
+        buffer_64              alt_result[8];
+        buffer_64              temp_result[8];
+        uint8_t                s_sequence[SALT_SIZE];
+        uint8_t                p_sequence[PLAINTEXT_LENGTH];
+} working_memory;
+
 #endif
\ No newline at end of file
diff --git a/src/opencl_cryptsha512_fmt.c b/src/opencl_cryptsha512_fmt.c
index dfaa949..977591d 100644
--- a/src/opencl_cryptsha512_fmt.c
+++ b/src/opencl_cryptsha512_fmt.c
@@ -1,8 +1,10 @@
 /*
- * Copyright (c) 2011 Samuele Giovanni Tonon
- * samu at linuxasylum dot net
- * This program comes with ABSOLUTELY NO WARRANTY; express or
- * implied .
+ * Developed by Claudio André <claudio.andre at correios.net.br> in 2012   
+ * Based on source code provided by Samuele Giovanni Tonon
+ *
+ * Copyright (c) 2011 Samuele Giovanni Tonon <samu at linuxasylum dot net>
+ * Copyright (c) 2012 Claudio André <claudio.andre at correios.net.br>
+ * This program comes with ABSOLUTELY NO WARRANTY; express or implied .
  * This is free software, and you are welcome to redistribute it
  * under certain conditions; as expressed here 
  * http://www.gnu.org/licenses/gpl-2.0.html
@@ -10,18 +12,15 @@
 
 #include <string.h>
 #include "common-opencl.h"  
+#include "config.h"
 #include "opencl_cryptsha512.h"
 
-#define FORMAT_LABEL			"cryptsha512-opencl"
-#define FORMAT_NAME			"crypt SHA-512 OpenCL"
+#define FORMAT_LABEL			"cryptsha512-opencl" 
+#define FORMAT_NAME			"crypt SHA-512"
+#define ALGORITHM_NAME			"OpenCL"
+#define SHA_TYPE                        "SHA512"
 
-#if ARCH_BITS >= 64
-#define ALGORITHM_NAME			"OpenSSL 64/" ARCH_BITS_STR
-#else
-#define ALGORITHM_NAME			"OpenSSL 32/" ARCH_BITS_STR
-#endif
-
-#define BENCHMARK_COMMENT		" rounds=5000"
+#define BENCHMARK_COMMENT		" (rounds=5000)"
 #define BENCHMARK_LENGTH		-1
 
 #define BINARY_SIZE                     (3+16+86)       ///TODO: Magic number?
@@ -29,6 +28,9 @@
 #define MIN_KEYS_PER_CRYPT		1024            
 #define MAX_KEYS_PER_CRYPT		KEYS_PER_CRYPT
 
+#define LWS_CONFIG			"cryptsha512_LWS"
+#define KPC_CONFIG			"cryptsha512_KPC"
+
 static crypt_sha512_password            *plaintext;     // plaintext ciphertexts
 static crypt_sha512_hash                *out_hashes;    // calculated hashes
 static crypt_sha512_salt                salt_data;
@@ -52,6 +54,11 @@ static struct fmt_tests tests[] = {
     {NULL}
 }; 
 
+/* ------- Helper functions ------- */
+uint get_max_work_group_size(){
+    return get_local_memory_size(gpu_id) / sizeof(working_memory);
+}
+
 /* ------- Create and destroy necessary objects ------- */
 static void create_clobj(int kpc) {           
     pinned_saved_keys = clCreateBuffer(context[gpu_id], 
@@ -78,7 +85,7 @@ static void create_clobj(int kpc) {
     salt_info = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, 
             sizeof(crypt_sha512_salt), NULL, &ret_code);
     HANDLE_CLERROR(ret_code, "Error creating data_info out argument");
-    
+     
     buffer_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
             sizeof(crypt_sha512_password) * kpc, NULL, &ret_code);
     HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys");
@@ -93,8 +100,11 @@ static void create_clobj(int kpc) {
     HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof (cl_mem),
             (void *) &buffer_in), "Error setting argument 1");
     HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof (cl_mem),
-            (void *) &buffer_out), "Error setting argument 2");
-    
+            (void *) &buffer_out), "Error setting argument 2");     
+    HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3,   //Fast working memory.
+            sizeof (working_memory) * local_work_size,
+            NULL), "Error setting argument 3");   
+ 
     memset(plaintext, '\0', sizeof(crypt_sha512_password) * kpc);
     salt_data.saltlen = 0;
     salt_data.rounds = 0;
@@ -143,7 +153,12 @@ static char *get_key(int index) {
 /* ------- Try to find the best configuration ------- */
 /* --
   This function could be used to calculated the best num
-  of keys per crypt for the given format
+  for the workgroup
+  Work-items that make up a work-group (also referred to 
+  as the size of the work-group) 
+  LWS should never be a big number since every work-item
+  uses about 400 bytes of local memory. Local memory 
+  is usually 32 KB
 -- */
 static void find_best_workgroup(void) {
     cl_event myEvent;
@@ -170,7 +185,8 @@ static void find_best_workgroup(void) {
             sizeof (crypt_sha512_password) * KEYS_PER_CRYPT, plaintext, 0, NULL, NULL);
 
     // Find minimum time
-    for (my_work_group = 1; (int) my_work_group <= (int) max_group_size; my_work_group *= 2) {
+    for (my_work_group = 1; (int) my_work_group <= (int) get_max_work_group_size(); 
+         my_work_group *= 2) {
         ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, 
                 1, NULL, &max_keys_per_crypt, &my_work_group, 0, NULL, &myEvent);
         clFinish(queue_prof);
@@ -191,7 +207,9 @@ static void find_best_workgroup(void) {
         }
     }
     printf("Optimal local work size %d\n", (int) local_work_size);
-    printf("(to avoid this test on next run do export LWS=%d)\n", (int) local_work_size);
+    printf("(to avoid this test on next run, put \""
+        LWS_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS
+        SUBSECTION_OPENCL "])\n", (int)local_work_size);    
     clReleaseCommandQueue(queue_prof);
 }
 
@@ -259,8 +277,9 @@ static void find_best_kpc(void) {
         clReleaseCommandQueue(queue_prof);
     }
     printf("Optimal keys per crypt %d\n", optimal_kpc);
-    printf("(to avoid this test on next run do \"export KPC=%d\")\n", optimal_kpc);
-
+    printf("to avoid this test on next run, put \""
+        KPC_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS
+        SUBSECTION_OPENCL "])\n", optimal_kpc);
     max_keys_per_crypt = optimal_kpc;
     release_clobj();
     create_clobj(optimal_kpc);
@@ -268,33 +287,51 @@ static void find_best_kpc(void) {
 
 /* ------- Initialization  ------- */
 static void init(struct fmt_main *pFmt) {
-    char *kpc;
+    char *tmp_value;
     opencl_init("$JOHN/cryptsha512_kernel.cl", gpu_id, platform_id);
 
     // create kernel to execute
     crypt_kernel = clCreateKernel(program[gpu_id], "kernel_crypt", &ret_code);
     HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?");
 
-    if (((kpc = getenv("LWS")) == NULL) || (atoi(kpc) == 0)) {
+    if ((tmp_value = cfg_get_param(SECTION_OPTIONS,
+                                   SUBSECTION_OPENCL, LWS_CONFIG)))
+        local_work_size = atoi(tmp_value);
+    
+    if ((tmp_value = getenv("LWS")))
+        local_work_size = atoi(tmp_value);
+
+    //Check if local_work_size is a valid number.
+    if (local_work_size > get_max_work_group_size()){
+        printf("Error: invalid local work size (LWS). Max value allowed is: %u\n" ,
+               get_max_work_group_size());
+        local_work_size = 0; //Force find a valid number.
+    }
+    
+    if (!local_work_size) {
+        local_work_size = get_max_work_group_size();
         create_clobj(KEYS_PER_CRYPT);
         find_best_workgroup();
         release_clobj();
-    } else {
-        local_work_size = atoi(kpc);
     }
-    if ((kpc = getenv("KPC")) == NULL) {
+
+    if ((tmp_value = cfg_get_param(SECTION_OPTIONS, 
+                                   SUBSECTION_OPENCL, KPC_CONFIG)))
+        max_keys_per_crypt = atoi(tmp_value);
+    else
+        max_keys_per_crypt = KEYS_PER_CRYPT;
+
+    if ((tmp_value = getenv("KPC")))
+        max_keys_per_crypt = atoi(tmp_value);
+    
+    if (max_keys_per_crypt)
+        create_clobj(max_keys_per_crypt);
+
+    else {
+        //user chose to die of boredom
         max_keys_per_crypt = KEYS_PER_CRYPT;
         create_clobj(KEYS_PER_CRYPT);
-    } else {
-        if (atoi(kpc) == 0) {
-            //user chose to die of boredom
-            max_keys_per_crypt = KEYS_PER_CRYPT;
-            create_clobj(KEYS_PER_CRYPT); 
-            find_best_kpc();
-        } else {
-            max_keys_per_crypt = atoi(kpc);
-            create_clobj(max_keys_per_crypt);
-        }
+        find_best_kpc();
     }
     printf("Local work size (LWS) %d, Keys per crypt (KPC) %Zd\n", 
             (int) local_work_size, max_keys_per_crypt);   
@@ -365,7 +402,7 @@ static void set_salt(void *salt) {
         }
         offset = endp - currentsalt;
     }
-    memcpy(salt_data.salt, currentsalt + offset, 16);
+    memcpy(salt_data.salt, currentsalt + offset, SALT_SIZE);
     salt_data.saltlen = strlen((char *) salt_data.salt);
 }
 
@@ -477,7 +514,7 @@ static void crypt_all(int count) {
 }
 
 /* ------- Binary Hash functions group ------- */
-static int binary_hash_0(void * binary) { return *(ARCH_WORD_32 *) binary & 0xF; }
+static int binary_hash_0(void * binary) { return *(ARCH_WORD_32 *) binary & 0xF; } 
 static int binary_hash_1(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFF; }
 static int binary_hash_2(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFF; }
 static int binary_hash_3(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFF; }
-- 
1.7.5.4


Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ