Follow us on Twitter or via RSS feeds with tweets or complete announcement texts or excerpts
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Sun, 16 Jan 2011 19:45:42 +0100
From: Samuele Giovanni Tonon <samu@...uxasylum.net>
To: john-users <john-users@...ts.openwall.com>
Subject: raw-sha1-opencl patch 1

hello,
attached here patch 1 for raw-sha1 with many improvements:
- added fix for nvidia cl compiler now it should work on both ati and 
  nvidia (not tested on nvidia atm)
- fixed some faulty loops of previous version due to my 
  misunderstanding of crypt_all() and cmp_all() calls
- moved prepare of sha1 msg from cpu to GPU saving some data transfer

what still need to do:
- some more optimization of both cl and C code
- add md5 cl hack to pass 1/4 (in sha1 case 1/5) of the hash back to jtr
  and send the whole hash only for cmp_one

how to add to jtr:
add the patch to a vanilla jtr 1.7.6 *after* adding jumbo-9-patch and
opencl-9 patch .

benchmark:
I took a sample dictionary file and encoded sha1 twice with a simple
python program:

#!/usr/bin/python
import sys
import hashlib

fp = file(sys.argv[1],'r')
fw = file(sys.argv[2],'w')
for line in fp.readlines():
        x= hashlib.sha1()
        y= hashlib.sha1()
        x.update(line.strip())
        #print "%s"%line.strip()
        y.update(x.hexdigest())
        yy = y.hexdigest()
        fw.write(yy)
        fw.write("\n")

fp.close()
fw.close()

this to make an "unbreakable" hashfile to test with


$../run/john --format=raw-sha1 ~/ppp       
Loaded 9000 password hashes with no different salts (Raw SHA-1
[raw-sha1])
guesses: 0  time: 0:00:09:52 (3)  c/s: 29179M  trying: Momesoo
guesses: 0  time: 0:00:10:04 (3)  c/s: 29185M  trying: cafespe1
guesses: 0  time: 0:00:10:05 (3)  c/s: 29185M  trying: cbmteapp
Session aborted
$../run/john --format=raw-sha1-opencl ~/ppp       

OpenCL Platform: <<<ATI Stream>>> and device: <<<Juniper>>>
Loaded 9000 password hashes with no different salts (Raw SHA-1 OpenCL
[SHA-1])
guesses: 0  time: 0:00:02:24 (3)  c/s: 3786M  trying: trckfox7 -
tomcltco
guesses: 0  time: 0:00:09:26 (3)  c/s: 4037M  trying: padhco49 -
phouiouw
guesses: 0  time: 0:00:10:06 (3)  c/s: 4034M  trying: 00SW091 - 0ryohlm
guesses: 0  time: 0:00:10:09 (3)  c/s: 4033M  trying: pldiftt - assh26t


so still 6 times slower than raw sha1 cpu version but..
gpu wasn't used very much: i could still see my desktop with no load at
all, when i benchmark pyrit my gpu hangs for some seconds .

i've tried changing SHA_BLOCK - which is the fixed size i send to the
gpu for each password - from 16 to 64 . performance decreased by 20% no
more so it's no more a matter of low work_size and data exchange.

Cl code can be optimized, for example the prepare_msg routine 
is ugly, some Opencl reading could help a bit.


Regards
Samuele 



-- 
While various networks have become deeply rooted, and thoughts have been
sent out as light and electrons in a singular direction, this era has
yet to digitize/computerize to the degree necessary for individuals to
become a singular complex entity.
  KOUKAKU KIDOUTAI Stand Alone Complex


diff -urpN john-1.7.6/run/sha1_opencl_kernel.cl john-1.7.6.new/run/sha1_opencl_kernel.cl
--- john-1.7.6/run/sha1_opencl_kernel.cl	1970-01-01 01:00:00.000000000 +0100
+++ john-1.7.6.new/run/sha1_opencl_kernel.cl	2011-01-16 18:40:12.000000000 +0100
@@ -0,0 +1,202 @@
+/* 
+   This code was taken and merged from pyrit opencl sha1 routines royger's sample ( http://royger.org/opencl/?p=12) 
+   and largely inspired from md5_opencl_kernel.cl 
+*/
+
+#define K0  0x5A827999
+#define K1  0x6ED9EBA1
+#define K2  0x8F1BBCDC
+#define K3  0xCA62C1D6
+
+#define H1 0x67452301
+#define H2 0xEFCDAB89
+#define H3 0x98BADCFE
+#define H4 0x10325476
+#define H5 0xC3D2E1F0
+
+#define SHA_BLOCK 	16
+#define SSHA_NUM_KEYS 	2
+
+#ifndef uint32_t
+#define uint32_t unsigned int
+#endif
+
+
+typedef struct {
+    uint32_t h0,h1,h2,h3,h4;
+} SHA_DEV_CTX;
+
+void prepare_msg(__global uchar *s, char *dest) {
+    int i;
+    uint ulen;
+
+    for(i = 0; i < SHA_BLOCK && s[i] != 0x80 ; i++){
+        dest[i] = s[i];
+    }
+    ulen = (i * 8) & 0xFFFFFFFF;
+    dest[i] = (char) 0x80;
+    i=i+1;
+    for(;i<60;i++){
+	dest[i] = (char) 0;
+    }
+    dest[60] = ulen >> 24;
+    dest[61] = ulen >> 16;
+    dest[62] = ulen >> 8;
+    dest[63] = ulen;
+    
+    return;
+}
+
+__kernel void sha1_crypt_kernel(__global const char *plain_key,  __global SHA_DEV_CTX *digest){
+    int t, word_pad, gid, msg_pad;
+    uint W[80], temp, A,B,C,D,E;
+    uchar msg[64];
+    
+    gid = get_global_id(0);
+    word_pad = gid * 64;
+    msg_pad = gid * SHA_BLOCK;
+   
+
+    A = H1;
+    B = H2;
+    C = H3;
+    D = H4;
+    E = H5;
+    
+    prepare_msg(&plain_key[msg_pad],msg);
+    
+    for (t = 0; t < 16; t++){
+        W[t] = ((uchar) msg[ t * 4]) << 24;
+        W[t] |= ((uchar) msg[ t * 4 + 1]) << 16;
+        W[t] |= ((uchar) msg[ t * 4 + 2]) << 8;
+        W[t] |= (uchar) msg[ t * 4 + 3];
+    }
+
+#undef R
+#define R(t)                                              \
+(                                                         \
+    temp = W[(t -  3) & 0x0F] ^ W[(t - 8) & 0x0F] ^       \
+           W[(t - 14) & 0x0F] ^ W[ t      & 0x0F],        \
+    ( W[t & 0x0F] = rotate((int)temp,1) )                 \
+)
+
+#undef P
+#define P(a,b,c,d,e,x)                                    \
+{                                                         \
+    e += rotate((int)a,5) + F(b,c,d) + K + x; b = rotate((int)b,30);\
+}
+
+#define F(x,y,z) (z ^ (x & (y ^ z)))
+#define K 0x5A827999
+  
+  P( A, B, C, D, E, W[0]  );
+  P( E, A, B, C, D, W[1]  );
+  P( D, E, A, B, C, W[2]  );
+  P( C, D, E, A, B, W[3]  );
+  P( B, C, D, E, A, W[4]  );
+  P( A, B, C, D, E, W[5]  );
+  P( E, A, B, C, D, W[6]  );
+  P( D, E, A, B, C, W[7]  );
+  P( C, D, E, A, B, W[8]  );
+  P( B, C, D, E, A, W[9]  );
+  P( A, B, C, D, E, W[10] );
+  P( E, A, B, C, D, W[11] );
+  P( D, E, A, B, C, W[12] );
+  P( C, D, E, A, B, W[13] );
+  P( B, C, D, E, A, W[14] );
+  P( A, B, C, D, E, W[15] );
+  P( E, A, B, C, D, R(16) );
+  P( D, E, A, B, C, R(17) );
+  P( C, D, E, A, B, R(18) );
+  P( B, C, D, E, A, R(19) );
+
+#undef K
+#undef F
+
+#define F(x,y,z) (x ^ y ^ z)
+#define K 0x6ED9EBA1
+  
+  P( A, B, C, D, E, R(20) );
+  P( E, A, B, C, D, R(21) );
+  P( D, E, A, B, C, R(22) );
+  P( C, D, E, A, B, R(23) );
+  P( B, C, D, E, A, R(24) );
+  P( A, B, C, D, E, R(25) );
+  P( E, A, B, C, D, R(26) );
+  P( D, E, A, B, C, R(27) );
+  P( C, D, E, A, B, R(28) );
+  P( B, C, D, E, A, R(29) );
+  P( A, B, C, D, E, R(30) );
+  P( E, A, B, C, D, R(31) );
+  P( D, E, A, B, C, R(32) );
+  P( C, D, E, A, B, R(33) );
+  P( B, C, D, E, A, R(34) );
+  P( A, B, C, D, E, R(35) );
+  P( E, A, B, C, D, R(36) );
+  P( D, E, A, B, C, R(37) );
+  P( C, D, E, A, B, R(38) );
+  P( B, C, D, E, A, R(39) );
+  
+#undef K
+#undef F
+  
+#define F(x,y,z) ((x & y) | (z & (x | y)))
+#define K 0x8F1BBCDC
+  
+  P( A, B, C, D, E, R(40) );
+  P( E, A, B, C, D, R(41) );
+  P( D, E, A, B, C, R(42) );
+  P( C, D, E, A, B, R(43) );
+  P( B, C, D, E, A, R(44) );
+  P( A, B, C, D, E, R(45) );
+  P( E, A, B, C, D, R(46) );
+  P( D, E, A, B, C, R(47) );
+  P( C, D, E, A, B, R(48) );
+  P( B, C, D, E, A, R(49) );
+  P( A, B, C, D, E, R(50) );
+  P( E, A, B, C, D, R(51) );
+  P( D, E, A, B, C, R(52) );
+  P( C, D, E, A, B, R(53) );
+  P( B, C, D, E, A, R(54) );
+  P( A, B, C, D, E, R(55) );
+  P( E, A, B, C, D, R(56) );
+  P( D, E, A, B, C, R(57) );
+  P( C, D, E, A, B, R(58) );
+  P( B, C, D, E, A, R(59) );
+  
+#undef K
+#undef F
+
+#define F(x,y,z) (x ^ y ^ z)
+#define K 0xCA62C1D6
+  
+  P( A, B, C, D, E, R(60) );
+  P( E, A, B, C, D, R(61) );
+  P( D, E, A, B, C, R(62) );
+  P( C, D, E, A, B, R(63) );
+  P( B, C, D, E, A, R(64) );
+  P( A, B, C, D, E, R(65) );
+  P( E, A, B, C, D, R(66) );
+  P( D, E, A, B, C, R(67) );
+  P( C, D, E, A, B, R(68) );
+  P( B, C, D, E, A, R(69) );
+  P( A, B, C, D, E, R(70) );
+  P( E, A, B, C, D, R(71) );
+  P( D, E, A, B, C, R(72) );
+  P( C, D, E, A, B, R(73) );
+  P( B, C, D, E, A, R(74) );
+  P( A, B, C, D, E, R(75) );
+  P( E, A, B, C, D, R(76) );
+  P( D, E, A, B, C, R(77) );
+  P( C, D, E, A, B, R(78) );
+  P( B, C, D, E, A, R(79) );
+
+#undef K
+#undef F
+    
+    digest[gid].h0 = as_uint(as_uchar4(A + H1).wzyx);
+    digest[gid].h1 = as_uint(as_uchar4(B + H2).wzyx);
+    digest[gid].h2 = as_uint(as_uchar4(C + H3).wzyx);
+    digest[gid].h3 = as_uint(as_uchar4(D + H4).wzyx);
+    digest[gid].h4 = as_uint(as_uchar4(E + H5).wzyx);
+}
diff -urpN john-1.7.6/src/Makefile john-1.7.6.new/src/Makefile
--- john-1.7.6/src/Makefile	2011-01-16 17:13:42.000000000 +0100
+++ john-1.7.6.new/src/Makefile	2011-01-16 17:11:43.000000000 +0100
@@ -23,11 +23,11 @@ OMPFLAGS =
 #OMPFLAGS = -xopenmp
 # MD4, MD5 and OpenCL debugging
 #DEBUG = -DDEBUG
-CFLAGS = -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG)
-#CFLAGS = -g -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG)
+#CFLAGS = -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG)
+CFLAGS = -g -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG)
 # -DHAVE_SKEY
 ASFLAGS = -c $(OMPFLAGS)
-LDFLAGS = -L/usr/local/lib -L/usr/local/ssl/lib -lcrypto -lm $(OMPFLAGS)
+LDFLAGS = -L/usr/local/lib -L/usr/local/ssl/lib -L$(ATISTREAMSDKROOT)/lib/x86_64 -lcrypto -lm $(OMPFLAGS)
 #LDFLAGS = -s -L/usr/local/lib -L/usr/local/ssl/lib -lcrypto -lm $(OMPFLAGS)
 # -lskey
 LDFLAGS_SOLARIS = -lrt -lnsl -lsocket
@@ -62,6 +62,7 @@ JOHN_OBJS = \
 	hmacMD5_fmt.o \
 	IPB2_fmt.o \
 	rawSHA1_fmt.o \
+	rawSHA1_opencl_fmt.o \
 	sha1_gen_fmt.o \
 	rawMD4_fmt.o \
 	md4_gen_fmt.o \
diff -urpN john-1.7.6/src/john.c john-1.7.6.new/src/john.c
--- john-1.7.6/src/john.c	2011-01-16 17:13:42.000000000 +0100
+++ john-1.7.6.new/src/john.c	2011-01-16 17:12:03.000000000 +0100
@@ -47,6 +47,8 @@ extern struct fmt_main fmt_AFS, fmt_LM;
 extern struct fmt_main fmt_crypt;
 #endif
 
+//extern struct fmt_main fmt_opencl_NSLDAPS;
+extern struct fmt_main fmt_opencl_rawSHA1;
 extern struct fmt_main fmt_NT, fmt_XSHA;
 extern struct fmt_main fmt_PO;
 extern struct fmt_main fmt_rawMD5go;
@@ -125,6 +127,8 @@ static void john_register_all(void)
 	john_register_one(&fmt_AFS);
 	john_register_one(&fmt_LM);
 
+	//john_register_one(&fmt_opencl_NSLDAPS);
+	john_register_one(&fmt_opencl_rawSHA1);
 	john_register_one(&fmt_NT);
 	john_register_one(&fmt_XSHA);
 	john_register_one(&fmt_mscash);
diff -urpN john-1.7.6/src/rawSHA1_opencl_fmt.c john-1.7.6.new/src/rawSHA1_opencl_fmt.c
--- john-1.7.6/src/rawSHA1_opencl_fmt.c	1970-01-01 01:00:00.000000000 +0100
+++ john-1.7.6.new/src/rawSHA1_opencl_fmt.c	2011-01-16 18:39:59.000000000 +0100
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2011 Samuele Giovanni Tonon
+ * samu at linuxasylum dot net
+ * Released under GPL license 
+ */
+
+#include <string.h>
+
+#include "path.h"
+#include "arch.h"
+#include "misc.h"
+#include "common.h"
+#include "formats.h"
+#include "sha.h"
+
+#define FORMAT_LABEL			"raw-sha1-opencl"
+#define FORMAT_NAME			"Raw SHA-1 OpenCL"
+#define ALGORITHM_NAME			"raw-sha1-opencl"
+#define SHA_TYPE                        "SHA-1"
+#define BENCHMARK_COMMENT		""
+#define BENCHMARK_LENGTH		0
+
+#define PLAINTEXT_LENGTH		32
+#define CIPHERTEXT_LENGTH		40
+
+#define BINARY_SIZE			20
+#define SALT_SIZE			0
+
+
+#define SHA_BLOCK			16
+#define SSHA_NUM_KEYS         		1024*2048
+
+#define MIN_KEYS_PER_CRYPT		SSHA_NUM_KEYS
+#define MAX_KEYS_PER_CRYPT		SSHA_NUM_KEYS
+
+#ifndef uint32_t
+#define uint32_t unsigned int
+#endif
+
+typedef struct {
+    uint32_t h0,h1,h2,h3,h4;
+} SHA_DEV_CTX;
+
+
+cl_platform_id platform;
+cl_device_id devices;
+cl_context context;
+cl_program program;
+cl_command_queue queue;
+cl_int ret_code;
+cl_kernel sha1_crypt_kernel;
+cl_mem pinned_saved_keys, pinned_partial_hashes, buffer_out, buffer_keys, buffer_hash, len_buffer;
+static SHA_DEV_CTX *outbuffer;
+static char *inbuffer;
+static size_t global_work_size = SSHA_NUM_KEYS;
+static size_t local_work_size = 256;
+//static size_t local_work_size = 1;
+
+static struct fmt_tests rawsha1_tests[] = {
+	{"a9993e364706816aba3e25717850c26c9cd0d89d", "abc"},
+	{"2fbf0eba37de1d1d633bc1ed943b907f9b360d4c", "azertyuiop1"},
+	{"f879f8090e92232ed07092ebed6dc6170457a21d", "azertyuiop2"},
+	{"1813c12f25e64931f3833b26e999e26e81f9ad24", "azertyuiop3"},
+	{NULL}
+};
+
+static char saved_key[SSHA_NUM_KEYS][PLAINTEXT_LENGTH];
+
+static void if_error_log(cl_int ret_code, const char *message)
+{
+    if(ret_code != CL_SUCCESS) {
+        printf("\nOpenCL: %s\n", message);
+        exit(-1);
+    }
+}
+
+static int valid(char *ciphertext)
+{
+	int i;
+
+	if (strlen(ciphertext) != CIPHERTEXT_LENGTH) return 0;
+	for (i = 0; i < CIPHERTEXT_LENGTH; i++){
+		if (!(  (('0' <= ciphertext[i])&&(ciphertext[i] <= '9')) ||
+					(('a' <= ciphertext[i])&&(ciphertext[i] <= 'f'))  
+					|| (('A' <= ciphertext[i])&&(ciphertext[i] <= 'F'))))
+			return 0;
+	}
+	return 1;
+}
+
+static void rawsha1_set_salt(void *salt) { }
+
+static void rawsha1_opencl_init(void)
+{
+    // load kernel source
+    char *source=(char*)mem_alloc(1024*16);
+    FILE *fp = fopen(path_expand("$JOHN/sha1_opencl_kernel.cl"),"r");
+    if(!fp)
+        if_error_log(!CL_SUCCESS, "Source kernel not found!");
+    size_t source_size = fread(source, sizeof(char), 1024*16, fp);
+    source[source_size] = 0;
+    fclose(fp);
+
+    // get a platform and its information
+    size_t max_group_size;
+    char log[1024*64];
+    ret_code = clGetPlatformIDs(1, &platform, NULL);
+    if_error_log(ret_code, "No OpenCL platform exist");
+    ret_code = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(log), log, NULL);
+    if_error_log(ret_code, "Error querying PLATFORM_NAME");
+    printf("\nOpenCL Platform: <<<%s>>>", log);
+
+    // find an OpenCL device
+    //ret_code = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &devices, NULL);
+    ret_code = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &devices, NULL);
+    if_error_log(ret_code, "No OpenCL device of that type exist");
+    ret_code = clGetDeviceInfo(devices, CL_DEVICE_NAME, sizeof(log), log, NULL);
+    if_error_log(ret_code, "Error querying DEVICE_NAME");
+    printf(" and device: <<<%s>>>\n",log);
+    ret_code = clGetDeviceInfo(devices, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_group_size), &max_group_size, NULL);
+    if_error_log(ret_code, "Error querying MAX_WORK_GROUP_SIZE");
+
+    // create a context and command queue on the device.
+    context = clCreateContext(NULL, 1, &devices, NULL, NULL, &ret_code);
+    if_error_log(ret_code, "Error creating context");
+    queue = clCreateCommandQueue(context, devices, 0, &ret_code);
+    if_error_log(ret_code, "Error creating command queue");
+
+    // submit the kernel source for compilation
+    program = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &ret_code);
+    if_error_log(ret_code,"Error creating program");
+    ret_code = clBuildProgram(program, 1, &devices, NULL, NULL, NULL);
+    if(ret_code != CL_SUCCESS) {
+        printf("failed in clBuildProgram with %d\n", ret_code);
+        clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, sizeof(log), (void*)log, NULL);
+        printf("compilation log: %s\n", log);
+        exit(-1);
+    }
+
+    // create kernel to execute
+    sha1_crypt_kernel = clCreateKernel(program, "sha1_crypt_kernel", &ret_code);
+    if_error_log(ret_code, "Error creating kernel. Double-check kernel name?");
+
+    // create Page-Locked (Pinned) memory for higher bandwidth between host and device (Nvidia Best Practices)
+    pinned_saved_keys = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, (SHA_BLOCK)*SSHA_NUM_KEYS, NULL, &ret_code);
+    if_error_log (ret_code, "Error creating page-locked memory");
+    inbuffer = (char*)clEnqueueMapBuffer(queue, pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, (SHA_BLOCK)*SSHA_NUM_KEYS, 0, NULL, NULL, &ret_code);
+    if_error_log (ret_code, "Error mapping page-locked memory inbuffer");
+
+    memset(inbuffer,0,SHA_BLOCK*SSHA_NUM_KEYS);
+
+
+    pinned_partial_hashes = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, NULL, &ret_code);
+    if_error_log (ret_code, "Error creating page-locked memory");
+
+    outbuffer = (SHA_DEV_CTX *)clEnqueueMapBuffer(queue, pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, 0, NULL, NULL, &ret_code);
+    if_error_log (ret_code, "Error mapping page-locked memory outbuffer");
+
+    // create and set arguments
+    buffer_keys = clCreateBuffer(context, CL_MEM_READ_ONLY, (SHA_BLOCK)*SSHA_NUM_KEYS, NULL, &ret_code);
+    if_error_log (ret_code, "Error creating buffer keys argument");
+
+    buffer_out  = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, NULL, &ret_code);
+    if_error_log (ret_code,"Error creating buffer out argument");
+
+    ret_code = clSetKernelArg(sha1_crypt_kernel, 0, sizeof(buffer_keys), (void*) &buffer_keys);
+    if_error_log (ret_code, "Error setting argument 1");
+
+    ret_code = clSetKernelArg(sha1_crypt_kernel, 1, sizeof(buffer_out ), (void*) &buffer_out);
+    if_error_log (ret_code, "Error setting argument 3");
+}
+
+static void rawsha1_set_key(char *key, int index) {
+	int lenpwd;
+	
+        memset(saved_key[index],0,PLAINTEXT_LENGTH);
+	
+	strnzcpy(saved_key[index], key, PLAINTEXT_LENGTH);
+	lenpwd = strlen(saved_key[index]);
+        
+	memcpy(&(inbuffer[index*SHA_BLOCK]),saved_key[index],SHA_BLOCK);
+        inbuffer[index*SHA_BLOCK+lenpwd] = 0x80;
+	//printf("key=%s index=%d\n",saved_key[index],index);
+}
+
+static char *rawsha1_get_key(int index) {
+	return saved_key[index];
+}
+
+static int rawsha1_cmp_all(void *binary, int count) { 
+	unsigned int i = 0;
+	unsigned int b = ((unsigned int *)binary)[0];
+
+	for(; i<count; i++)
+		if(b==outbuffer[i].h0)
+			return 1;
+	return 0;
+}
+
+static int rawsha1_cmp_exact(char *source, int count){
+  	return (1);
+}
+
+static int rawsha1_cmp_one(void * binary, int index)
+{
+    unsigned int *t=(unsigned int *)binary;
+
+    // h1
+    if (t[1]!=outbuffer[index].h1)
+        return 0;
+    // h2
+    if (t[2]!=outbuffer[index].h2)
+        return 0;
+    // h3
+    if (t[3]!=outbuffer[index].h3)
+        return 0;
+    // h4
+    return t[4]==outbuffer[index].h4;
+	
+}
+
+static void rawsha1_crypt_all(int count) {  
+        cl_int code;
+	int i,k;
+
+	code = clEnqueueWriteBuffer(queue, buffer_keys, CL_TRUE, 0, (SHA_BLOCK) * SSHA_NUM_KEYS, inbuffer, 0, NULL, NULL);
+	if(code != CL_SUCCESS) {
+		printf("failed in clEnqueueWriteBuffer inbuffer with code %d\n", code);
+		exit(-1);
+	}
+	// execute ssha kernel
+	code = clEnqueueNDRangeKernel(queue, sha1_crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
+	if(code != CL_SUCCESS) {
+		printf("failed in clEnqueueNDRangeKernel with code %d\n", code);
+		exit(-1);
+	}
+	clFinish(queue);
+	// read back partial hashes
+	clEnqueueReadBuffer(queue, buffer_out, CL_TRUE, 0, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, outbuffer, 0, NULL, NULL);
+	/*for(i=0;i<count;i++){
+		printf(" %x ",outbuffer[i].h0);
+		printf(" %x ",outbuffer[i].h1);
+		printf(" %x ",outbuffer[i].h2);
+		printf(" %x ",outbuffer[i].h3);
+		printf(" %x ",outbuffer[i].h4);
+		printf("\n");
+	} */
+}
+
+static void * rawsha1_binary(char *ciphertext) 
+{
+	static char realcipher[BINARY_SIZE];
+	int i;
+	
+	for(i=0;i<BINARY_SIZE;i++) {
+		realcipher[i] = atoi16[ARCH_INDEX(ciphertext[i*2])]*16 + atoi16[ARCH_INDEX(ciphertext[i*2+1])];
+	}
+	return (void *)realcipher;
+}
+
+static int binary_hash_0(void *binary){
+	//printf("bin_hash0=%x\n",((ARCH_WORD_32*)binary)[0]);
+	return ((ARCH_WORD_32 *)binary)[0] & 0xF;
+}
+
+static int binary_hash_1(void *binary){
+	return ((ARCH_WORD_32 *)binary)[0] & 0xFF;
+}
+
+static int binary_hash_2(void *binary){
+	return ((ARCH_WORD_32 *)binary)[0] & 0xFFF;
+}
+
+static int get_hash_0(int index){
+	//printf("get_hash0=%x\n",outbuffer[index].h0);
+	return outbuffer[index].h0 & 0xF;
+}
+
+static int get_hash_1(int index) {
+	return outbuffer[index].h0 & 0xFF;
+}
+
+static int get_hash_2(int index){
+	return outbuffer[index].h0 & 0xFFF;
+}
+
+struct fmt_main fmt_opencl_rawSHA1 = {
+	{
+		FORMAT_LABEL,
+		FORMAT_NAME,
+		SHA_TYPE,
+		BENCHMARK_COMMENT,
+		BENCHMARK_LENGTH,
+		PLAINTEXT_LENGTH,
+		BINARY_SIZE,
+		SALT_SIZE,
+		MIN_KEYS_PER_CRYPT,
+		MAX_KEYS_PER_CRYPT,
+		FMT_CASE | FMT_8_BIT,
+		rawsha1_tests
+	}, {
+		rawsha1_opencl_init,
+		valid,
+		fmt_default_split,
+		rawsha1_binary,
+		fmt_default_salt,
+		{
+			binary_hash_0,
+			binary_hash_1,
+			binary_hash_2,
+			NULL,
+			NULL
+		},
+		fmt_default_salt_hash,
+		rawsha1_set_salt,
+		rawsha1_set_key,
+		rawsha1_get_key,
+		fmt_default_clear_keys,
+		rawsha1_crypt_all,
+		{
+			get_hash_0,
+			get_hash_1,
+			get_hash_2,
+			NULL,
+			NULL
+		},
+		rawsha1_cmp_all,
+		rawsha1_cmp_one,
+		rawsha1_cmp_exact
+	}
+};

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux - Powered by OpenVZ