Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 04 Apr 2012 09:43:06 -0300
From: Claudio André <claudioandre.br@...il.com>
To: john-dev@...ts.openwall.com
Subject: Re: OpenCL runtime errors

Hi, since you sad there are some delay, the error might occur in "setup".

So, i put more error treatment in find_best_kpc and  find_best_workgroup 
routines. If you can try this patch and check the results, i will be 
grateful. I expect a "managed" error now.

Thanks.

PS: recently, i haven't read e-mails that are not (directly) sent to me. 
If someone sent a message and need an answer, please just ping me, or 
just wait a week or so.



Em 03-04-2012 21:17, Solar Designer escreveu:
> Lukas, magnum -
>
> Somehow four of the OpenCL formats don't work for me on this system
> where I previously had a mix of Nvidia and AMD stuff, which I've now
> temporarily tried to clean up to just Nvidia.  The errors are different.
>
> The rest of the OpenCL formats work fine - specifically, all of
> Samuele's fast hashes and magnum's new RAR with OpenCL pass the tests.
> So my setup is not 100% broken. ;-)  (Also, all CUDA stuff works.)
>
> user@...l:~/john/magnum-jumbo/src$ ../run/john -te -fo=cryptmd5-opencl
> OpenCL platform 0: NVIDIA CUDA, 1 device(s).
> Using device 0: GeForce GTX 570
> OpenCL error (CL_INVALID_VALUE) in file (common-opencl.c) at line (128) - (Error while getting build info)
> user@...l:~/john/magnum-jumbo/src$ ../run/john -te -fo=cryptsha512-opencl
> OpenCL platform 0: NVIDIA CUDA, 1 device(s).
> Using device 0: GeForce GTX 570
> Segmentation fault (core dumped)
> user@...l:~/john/magnum-jumbo/src$ ../run/john -te -fo=phpass-opencl
> OpenCL platform 0: NVIDIA CUDA, 1 device(s).
> Using device 0: GeForce GTX 570
> OpenCL error (CL_OUT_OF_RESOURCES) in file (opencl_phpass_fmt.c) at line (162) - (Run kernel)
> user@...l:~/john/magnum-jumbo/src$ ../run/john -te -fo=mscash2-opencl
> OpenCL platform 0: NVIDIA CUDA, 1 device(s).
> Using device 0: GeForce GTX 570
> Benchmarking: mscash2-OPENCL [PBKDF2_HMAC_SHA1]... SYNC FAILED
> Write Read FAILED
> FAILED (get_hash[0](8092))
>
> The segfault for cryptsha512-opencl occurs with a delay of 10 seconds or
> so.  All or some of this is probably something I'll need to deal with on
> my end (such as by reinstalling), yet I thought it might be helpful to
> have this recorded.
>
> Alexander

>From 22ed16f6ae244eca9a40a28611c293d41e9f0785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Claudio=20Andr=C3=A9?= <claudio.andre@...reios.net.br>
Date: Wed, 4 Apr 2012 09:28:13 -0300
Subject: [PATCH] More error treatment in find_best_kpc and
 find_best_workgroup. Only transfer keys to GPU is necessary
 (magnum idea).

---
 src/opencl/cryptsha512_kernel.cl |   15 +++----
 src/opencl_cryptsha512.h         |    7 +--
 src/opencl_cryptsha512_fmt.c     |   79 ++++++++++++++++++++++++-------------
 3 files changed, 59 insertions(+), 42 deletions(-)

diff --git a/src/opencl/cryptsha512_kernel.cl b/src/opencl/cryptsha512_kernel.cl
index c8a0c86..c3e7ca9 100644
--- a/src/opencl/cryptsha512_kernel.cl
+++ b/src/opencl/cryptsha512_kernel.cl
@@ -49,12 +49,12 @@ void init_ctx(__local sha512_ctx * ctx) {
     ctx->buflen = 0;
 }
 
-void memcpy_08(__local uint8_t * dest, __local const uint8_t * src, const size_t n) {
+inline void memcpy_08(__local uint8_t * dest, __local const uint8_t * src, const size_t n) {
     for (int i = 0; i < n; i++)
         dest[i] = src[i];
 }
 
-void memcpy_64(__local uint8_t * dest, __local buffer_64 * src, const size_t n) {
+inline void memcpy_64(__local uint8_t * dest, __local buffer_64 * src, const size_t n) {
     for (int i = 0; i < n; i++)
         dest[i] = src->mem_08[i];
 }
@@ -70,7 +70,6 @@ void insert_to_buffer(__local sha512_ctx * ctx,
 }
 
 void sha512_block(__local sha512_ctx * ctx) {
-    int i;
     uint64_t a = ctx->H[0];
     uint64_t b = ctx->H[1];
     uint64_t c = ctx->H[2];
@@ -82,15 +81,13 @@ void sha512_block(__local sha512_ctx * ctx) {
 
     uint64_t w[16];
 
-    __local uint64_t *data = ctx->buffer->mem_64;  //The same as buffer[0]
-
     #pragma unroll 16
-    for (i = 0; i < 16; i++)
-        w[i] = SWAP64(data[i]);
+    for (int i = 0; i < 16; i++)
+        w[i] = SWAP64(ctx->buffer->mem_64[i]);
 
     uint64_t t1, t2;
     #pragma unroll 16
-    for (i = 0; i < 16; i++) {
+    for (int i = 0; i < 16; i++) {
         t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
         t2 = Maj(a, b, c) + Sigma0(a);
 
@@ -105,7 +102,7 @@ void sha512_block(__local sha512_ctx * ctx) {
     }
 
     #pragma unroll 64
-    for (i = 16; i < 80; i++) {
+    for (int i = 16; i < 80; i++) {
         w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
         t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
         t2 = Maj(a, b, c) + Sigma0(a);
diff --git a/src/opencl_cryptsha512.h b/src/opencl_cryptsha512.h
index c0b5f6a..e920c6e 100644
--- a/src/opencl_cryptsha512.h
+++ b/src/opencl_cryptsha512.h
@@ -37,8 +37,8 @@
 #define MIN_KEYS_PER_CRYPT	128
 #define MAX_KEYS_PER_CRYPT	2048*2048*128
 
-#define rol(x,n)                ((x << n) | (x >> (64-n)))
-#define ror(x,n)                ((x >> n) | (x << (64-n)))
+#define rol(x,n)                rotate(x,n) 
+#define ror(x,n)                rotate(x, (ulong) 64-n)
 #define Ch(x,y,z)               ((x & y) ^ ( (~x) & z))
 #define Maj(x,y,z)              ((x & y) ^ (x & z) ^ (y & z))
 #define Sigma0(x)               ((ror(x,28)) ^ (ror(x,34)) ^ (ror(x,39)))
@@ -46,9 +46,6 @@
 #define sigma0(x)               ((ror(x,1))  ^ (ror(x,8))  ^ (x>>7))
 #define sigma1(x)               ((ror(x,19)) ^ (ror(x,61)) ^ (x>>6))
 
-# define SWAP32(n) \
-    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
-
 # define SWAP64(n) \
   (((n) << 56)					\
    | (((n) & 0xff00) << 40)			\
diff --git a/src/opencl_cryptsha512_fmt.c b/src/opencl_cryptsha512_fmt.c
index 2f49260..a26c6ea 100644
--- a/src/opencl_cryptsha512_fmt.c
+++ b/src/opencl_cryptsha512_fmt.c
@@ -39,6 +39,7 @@ cl_command_queue queue_prof;
 cl_kernel crypt_kernel;
 
 static size_t max_keys_per_crypt; //TODO: move to common-opencl? local_work_size is there.
+static int new_keys;
 
 static struct fmt_tests tests[] = {
     {"$6$saltstring$svn8UoSVapNtMuq1ukKS4tPQd8iKwSMHWjl/O817G3uBnIFNjnQJuesI68u4OTLiBFdcbYEdFCoEOfaS35inz1", "Hello world!"},
@@ -151,7 +152,8 @@ static void release_clobj(void) {
 static void set_key(char *key, int index) {
     int len = strlen(key);
     plaintext[index].length = len;
-    memcpy(plaintext[index].v, key, len); 
+    memcpy(plaintext[index].v, key, len);
+    new_keys = 1;
 }
 
 static char *get_key(int index) {
@@ -182,6 +184,7 @@ static void find_best_workgroup(void) {
     max_group_size = get_max_work_group_size(gpu_id);
     queue_prof = clCreateCommandQueue(context[gpu_id], devices[gpu_id], 
             CL_QUEUE_PROFILING_ENABLE, &ret_code);
+    HANDLE_CLERROR(ret_code, "Failed in clCreateCommandQueue");    
     printf("Max Group Work Size %d ", (int) max_group_size);
     local_work_size = 1;
 
@@ -189,29 +192,33 @@ static void find_best_workgroup(void) {
     for (i = 0; i < get_task_max_size(); i++) {
         set_key("aaabaabaaa", i);
     }
-    clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_TRUE, 0,
-            sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL);
-    clEnqueueWriteBuffer(queue_prof, buffer_in, CL_TRUE, 0, 
+    HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_TRUE, 0,
+            sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL),
+            "Failed in clEnqueueWriteBuffer I");
+    HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, buffer_in, CL_TRUE, 0, 
             sizeof (crypt_sha512_password) * get_task_max_size(), 
-            plaintext, 0, NULL, NULL);
+            plaintext, 0, NULL, NULL),
+            "Failed in clEnqueueWriteBuffer II");
 
     // Find minimum time
     for (my_work_group = 1; (int) my_work_group <= (int) get_task_max_work_group_size(); 
          my_work_group *= 2) {
         ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, 
                 1, NULL, &max_keys_per_crypt, &my_work_group, 0, NULL, &myEvent);
-        clFinish(queue_prof);
+        HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
 
         if (ret_code != CL_SUCCESS) {
             printf("Error %d\n", ret_code); ///Better commented by default.
             break;
         }
         //Get profile information
-        clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, 
-                sizeof (cl_ulong), &startTime, NULL);
-        clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, 
-                sizeof (cl_ulong), &endTime, NULL);
-        clReleaseEvent (myEvent);
+        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, 
+                sizeof (cl_ulong), &startTime, NULL),
+                "Failed in clGetEventProfilingInfo I");
+        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, 
+                sizeof (cl_ulong), &endTime, NULL),
+                "Failed in clGetEventProfilingInfo II");
+        HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
         
         if ((endTime - startTime) < kernelExecTimeNs) {
             kernelExecTimeNs = endTime - startTime;
@@ -222,7 +229,8 @@ static void find_best_workgroup(void) {
     printf("(to avoid this test on next run, put \""
         LWS_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS
         SUBSECTION_OPENCL "])\n", (int)local_work_size);    
-    clReleaseCommandQueue(queue_prof);
+    HANDLE_CLERROR(clReleaseCommandQueue(queue_prof), 
+            "Failed in clReleaseCommandQueue");
 }
 
 /* --
@@ -246,33 +254,45 @@ static void find_best_kpc(void) {
         create_clobj(num);
         advance_cursor();
         tmpbuffer = malloc(sizeof (crypt_sha512_hash) * num);
+        
+        if (tmpbuffer == NULL) {
+            printf ("Failed in malloc inside find_best_kpc\n");
+            exit (EXIT_FAILURE);
+        }
+        
         queue_prof = clCreateCommandQueue(context[gpu_id], devices[gpu_id], 
                 CL_QUEUE_PROFILING_ENABLE, &ret_code);
+        HANDLE_CLERROR(ret_code, "Failed in clCreateCommandQueue");
 
         // Set keys
         for (i = 0; i < num; i++) {
             set_key("aaabaabaaa", i);
         }
-        clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0,
-                sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL);
-        clEnqueueWriteBuffer(queue_prof, buffer_in, CL_FALSE, 0, 
-                sizeof (crypt_sha512_password) * num, plaintext, 0, NULL, NULL); 
+        HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0,
+                sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL),
+                "Failed in clEnqueueWriteBuffer I");
+        HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, buffer_in, CL_FALSE, 0, 
+                sizeof (crypt_sha512_password) * num, plaintext, 0, NULL, NULL),
+                "Failed in clEnqueueWriteBuffer II");
         ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, 
                 1, NULL, &num, &local_work_size, 0, NULL, &myEvent);
-        clEnqueueReadBuffer(queue_prof, buffer_out, CL_FALSE, 0,
-                sizeof (crypt_sha512_hash) * num, tmpbuffer, 0, NULL, NULL);
-        clFinish(queue_prof);
+        HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, buffer_out, CL_FALSE, 0,
+                sizeof (crypt_sha512_hash) * num, tmpbuffer, 0, NULL, NULL),
+                "Failed in clEnqueueReadBuffer");
+        HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
             
         if (ret_code != CL_SUCCESS) {
             printf("Error %d\n", ret_code);
             continue;
         }       
-        clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, 
-                sizeof (cl_ulong), &startTime, NULL);
-        clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, 
-                sizeof (cl_ulong), &endTime, NULL);
+        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, 
+                sizeof (cl_ulong), &startTime, NULL),
+                "Failed in clGetEventProfilingInfo I");
+        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, 
+                sizeof (cl_ulong), &endTime, NULL),
+                "Failed in clGetEventProfilingInfo II");
         
-        clReleaseEvent (myEvent);
+        HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
         tmpTime = endTime - startTime;
 
         if (((int) (((float) (tmpTime) / num) * 10)) <= kernelExecTimeNs) {
@@ -280,7 +300,8 @@ static void find_best_kpc(void) {
             optimal_kpc = num;
         }
         free(tmpbuffer);
-        clReleaseCommandQueue(queue_prof);
+        HANDLE_CLERROR(clReleaseCommandQueue(queue_prof), 
+            "Failed in clReleaseCommandQueue");
     }
     printf("Optimal keys per crypt %d\n", optimal_kpc);
     printf("to avoid this test on next run, put \""
@@ -501,9 +522,10 @@ static void crypt_all(int count) {
     HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0,
             sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL),
             "failed in clEnqueueWriteBuffer data_info");
-    HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_in, CL_FALSE, 0,
-            sizeof(crypt_sha512_password) * max_keys_per_crypt, plaintext, 0, NULL, NULL),
-            "failed in clEnqueueWriteBuffer buffer_in");
+    if (new_keys)
+        HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_in, CL_FALSE, 0,
+                sizeof(crypt_sha512_password) * max_keys_per_crypt, plaintext, 0, NULL, NULL),
+                "failed in clEnqueueWriteBuffer buffer_in");
 
     //Enqueue the kernel
     HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL,
@@ -517,6 +539,7 @@ static void crypt_all(int count) {
  
     //Do the work
     HANDLE_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
+    new_keys = 0;
 }
 
 /* ------- Binary Hash functions group ------- */
-- 
1.7.5.4


Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ