From 782e8fc029ee1fa66fe138c4e171847e050322b5 Mon Sep 17 00:00:00 2001 From: sayantan Date: Thu, 12 Apr 2012 22:34:42 +0530 Subject: [PATCH] 13% performance improvement on opencl_mscash2 , added new prepare() function --- src/opencl/pbkdf2_kernel.cl | 2390 ++++++++++++++++++++++--------------------- src/opencl_MSCASH2_fmt.c | 580 ++++++++---- 2 files changed, 1653 insertions(+), 1317 deletions(-) mode change 100755 => 100644 src/opencl/pbkdf2_kernel.cl mode change 100755 => 100644 src/opencl_MSCASH2_fmt.c diff --git a/src/opencl/pbkdf2_kernel.cl b/src/opencl/pbkdf2_kernel.cl old mode 100755 new mode 100644 index 2b70e51..342029c --- a/src/opencl/pbkdf2_kernel.cl +++ b/src/opencl/pbkdf2_kernel.cl @@ -6,1310 +6,1416 @@ */ #define ITERATIONS 10240 - + #define INIT_MD4_A 0x67452301 #define INIT_MD4_B 0xefcdab89 #define INIT_MD4_C 0x98badcfe #define INIT_MD4_D 0x10325476 - + #define SQRT_2 0x5a827999 #define SQRT_3 0x6ed9eba1 - + #define SHA1_DIGEST_LENGTH 20 - + #define INIT_SHA1_A 0x67452301 #define INIT_SHA1_B 0xEFCDAB89 #define INIT_SHA1_C 0x98BADCFE #define INIT_SHA1_D 0x10325476 #define INIT_SHA1_E 0xC3D2E1F0 - + + #ifndef GET_WORD_32_BE #define GET_WORD_32_BE(n,b,i) \ { \ - (n) = ( (unsigned long) ((b)[(i) ]&0x000000ff) <<24 )\ + (n) = ( (unsigned long) (b)[(i) ] <<24 )\ | ( (unsigned long) ((b)[(i) ]&0x0000ff00) << 8 )\ | ( (unsigned long) ((b)[(i) ]&0x00ff0000) >> 8 )\ - | ( (unsigned long) ((b)[(i) ]&0xff000000) >>24 );\ + | ( (unsigned long) (b)[(i) ] >>24 );\ } #endif - + #ifndef PUT_WORD_32_BE #define PUT_WORD_32_BE(n,b,i) \ { \ (b)[(i) ] = ((unsigned char) ( (n) >> 24 ))|((unsigned char) ( (n) >> 16 ))<<8|((unsigned char) ( (n) >> 8 ))<<16|((unsigned char) ( (n) ))<<24; \ } #endif - -#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n))) - -#define R(t) \ + +#define S5(x) ((x << 5) | ((x ) >> 27)) + +#define S30(x) ((x << 30) | ((x ) >> 2)) + +#define R0 \ ( \ - temp = W[(t - 3) & 0x0F] ^ W[(t - 8) & 0x0F] ^ \ - W[(t - 14) & 0x0F] ^ W[ t & 0x0F], \ - ( W[t & 0x0F] = S(temp,1) ) \ + W.s0 = rotate((int)(W.sd ^ W.s8 ^ W.s2 ^ W.s0),1) \ ) - + +#define R1 \ +( \ + W.s1 = rotate((int)(W.se ^ W.s9 ^ W.s3 ^ W.s1),1) \ +) + +#define R2 \ +( \ + W.s2 = rotate((int)(W.sf ^ W.sa ^ W.s4 ^ W.s2),1) \ +) + +#define R3 \ +( \ + W.s3 = rotate((int)(W.s0 ^ W.sb ^ W.s5 ^ W.s3),1) \ +) + +#define R4 \ +( \ + W.s4 = rotate((int)(W.s1 ^ W.sc ^ W.s6 ^ W.s4),1) \ +) + +#define R5 \ +( \ + W.s5 = rotate((int)(W.s2 ^ W.sd ^ W.s7 ^ W.s5),1) \ +) + +#define R6 \ +( \ + W.s6 = rotate((int)(W.s3 ^ W.se ^ W.s8 ^ W.s6),1 ) \ +) + +#define R7 \ +( \ + W.s7 = rotate((int)(W.s4 ^ W.sf ^ W.s9 ^ W.s7),1) \ +) + +#define R8 \ +( \ + W.s8 = rotate((int)( W.s5 ^ W.s0 ^ W.sa ^ W.s8 ),1 ) \ +) + +#define R9 \ +( \ + W.s9 = rotate((int)(W.s6 ^ W.s1 ^ W.sb ^ W.s9 ),1) \ +) + +#define RA \ +( \ + W.sa = rotate((int)(W.s7 ^ W.s2 ^ W.sc ^ W.sa ),1 ) \ +) + +#define RB \ +( \ + W.sb = rotate((int)(W.s8 ^ W.s3 ^ W.sd ^ W.sb ),1 ) \ +) + +#define RC \ +( \ + W.sc = rotate((int)(W.s9 ^ W.s4 ^ W.se ^ W.sc ),1 ) \ +) + +#define RD \ +( \ + W.sd = rotate((int)( W.sa ^ W.s5 ^ W.sf ^ W.sd ),1 ) \ +) + +#define RE \ +( \ + W.se = rotate((int)(W.sb ^ W.s6 ^ W.s0 ^ W.se ),1 ) \ +) + +#define RF \ +( \ + W.sf = rotate((int)(W.sc ^ W.s7 ^ W.s1 ^ W.sf ),1 ) \ +) + #define P(a,b,c,d,e,x) \ { \ - e += S(a,5) + F(b,c,d) + K + x; b = S(b,30); \ + e += rotate((int)a,5) + F(b,c,d) + K + x; b = rotate((int)b,30); \ } +__kernel __attribute__ ((reqd_work_group_size(64, 1, 1))) +void PBKDF2 ( const __global unsigned int *pass_global, + const __global unsigned int *salt, + int usrlen, + uint num_keys, + __global unsigned int *out_global) +{ + int lid = get_local_id(0); + + int id = get_global_id(0); + + unsigned int i, j, k; + + __local unsigned int salt_local[32], out[4 * 64]; + + if (lid == 0) + for (i = 0; i <= usrlen / 2; ++i) + salt_local[i] = salt[i]; + + k = 4 * lid; + + for (i = 0; i < 4; ++i) + out[k + i] = 0; + + barrier(CLK_LOCAL_MEM_FENCE); + + unsigned int pass[4]; + + unsigned int buf[16] = { 0 }; + + uint temp_char[SHA1_DIGEST_LENGTH / 4]; + + uint16 W; + + unsigned int A, B, C, D, E, state[5]; + + unsigned int ipad[16]; + + unsigned int opad[16]; + for (i = id, j = 0; i < 4 * num_keys; i = i + num_keys, j++) + pass[j] = pass_global[i]; + + if (usrlen % 2 == 1) { + for (i = 0; i <= usrlen / 2; i++) + buf[i] = salt_local[i]; + buf[(usrlen / 2) + 1] = 0x01 << 8; + } + + else { + for (i = 0; i < usrlen / 2; i++) + buf[i] = salt_local[i]; + buf[usrlen / 2] = 0x01 << 24; + } + + + for (i = 0; i < 16; i++) { + ipad[i] = 0x36363636; + opad[i] = 0x5C5C5C5C; + } + + + + // step 1: append zeros to the end of K to create a B Byte string + + if (usrlen % 2 == 1) + buf[usrlen / 2 + 1] = 0x80 << 16 | buf[usrlen / 2 + 1]; + else + buf[usrlen / 2 + 1] = 0x80 | buf[usrlen / 2 + 1]; + + + + PUT_WORD_32_BE((64 + usrlen * 2 + 4) << 3, buf, 60 / 4); + + + // step 2: XOR (bitwise exclusive-OR) the B byte string computed in step 1 with ipad + // step 5: XOR (bitwise exclusive-OR) the B byte string computed in step 1 with opad + + + for (j = 0; j < 4; j++) { + ipad[j] = ipad[j] ^ pass[j]; + opad[j] = opad[j] ^ pass[j]; + } + + // step 3: append the stream of data 'text' to the B byte sting resulting from step 2 + // first part of stream (64 bytes) is ipad, second part of stream (64 bytes) is buf + // step 4: apply H to the stream (ipad & buf) generated in step 3 + + GET_WORD_32_BE(W.s0, ipad, 0); + GET_WORD_32_BE(W.s1, ipad, 1); + GET_WORD_32_BE(W.s2, ipad, 2); + GET_WORD_32_BE(W.s3, ipad, 3); + GET_WORD_32_BE(W.s4, ipad, 4); + GET_WORD_32_BE(W.s5, ipad, 5); + GET_WORD_32_BE(W.s6, ipad, 6); + GET_WORD_32_BE(W.s7, ipad, 7); + GET_WORD_32_BE(W.s8, ipad, 8); + GET_WORD_32_BE(W.s9, ipad, 9); + GET_WORD_32_BE(W.sa, ipad, 10); + GET_WORD_32_BE(W.sb, ipad, 11); + GET_WORD_32_BE(W.sc, ipad, 12); + GET_WORD_32_BE(W.sd, ipad, 13); + GET_WORD_32_BE(W.se, ipad, 14); + GET_WORD_32_BE(W.sf, ipad, 15); + + + A = INIT_SHA1_A; + B = INIT_SHA1_B; + C = INIT_SHA1_C; + D = INIT_SHA1_D; + E = INIT_SHA1_E; -__kernel void PBKDF2(const __global unsigned int *pass_global, const __global unsigned int *salt, int usrlen, __global unsigned int *out_global) -{ int id=get_global_id(0); - unsigned int temp_char[SHA1_DIGEST_LENGTH/4],pass[4],out[4]; - unsigned int buf[16]={0}; - unsigned int i,j; - unsigned int temp, W[16]; - unsigned int A, B, C, D, E, state[5]; - unsigned int ipad[16]; - unsigned int opad[16]; - for(i=4*id,j=0;i<4*id+4;i++,j++) - { pass[j]=pass_global[i]; - out[j]=out_global[i]; - } - - if(usrlen%2==1) - { for(i=0;i<=usrlen/2;i++) - buf[i]=salt[i]; - buf[(usrlen/2)+1] = 0x01<<8; - } - else - { for(i=0;i -#include -#include -#include"common-opencl.h" +#include "common.h" +#include +#include +#include +#include +#include "common-opencl.h" #define INIT_MD4_A 0x67452301 + #define INIT_MD4_B 0xefcdab89 + #define INIT_MD4_C 0x98badcfe + #define INIT_MD4_D 0x10325476 #define SQRT_2 0x5a827999 + #define SQRT_3 0x6ed9eba1 -#define FORMAT_LABEL "mscash2-opencl" -#define FORMAT_NAME "MSCASH2-OPENCL" -#define KERNEL_NAME "PBKDF2" +#define FORMAT_LABEL "mscash2-opencl" + +#define FORMAT_NAME "MSCASH2-OPENCL" + +#define KERNEL_NAME "PBKDF2" + +#define ALGORITHM_NAME "PBKDF2_HMAC_SHA1" + + +#define BENCHMARK_COMMENT "" + +#define BENCHMARK_LENGTH -1 + -#define ALGORITHM_NAME "PBKDF2_HMAC_SHA1" +#define MSCASH2_PREFIX "$DCC2$" -#define BENCHMARK_COMMENT "" -#define BENCHMARK_LENGTH -1 -#define MSCASH2_PREFIX "$DCC2$" +#define MAX_KEYS_PER_CRYPT 64000 -#define MAX_KEYS_PER_CRYPT 800*80 -#define MIN_KEYS_PER_CRYPT 800*80 +#define MIN_KEYS_PER_CRYPT 64000 -#define MAX_SALT_LENGTH 15 //LENGTH OF SALT IN ASCII BEFORE CONVERTING TO TO UNICODE -#define MAX_PLAINTEXT_LENGTH 20 -#define MAX_CIPHERTEXT_LENGTH 54 //7 + MAX_SALT_LENGTH + 32 -#define BINARY_SIZE 16 +#define MAX_SALT_LENGTH 15 //LENGTH OF SALT IN ASCII BEFORE CONVERTING TO UNICODE + +#define MAX_PLAINTEXT_LENGTH 20 + +#define MAX_CIPHERTEXT_LENGTH 54 //7 + MAX_SALT_LENGTH + 32 + + +#define BINARY_SIZE 16 + # define SWAP(n) \ (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) + typedef struct { unsigned char username[MAX_SALT_LENGTH+1]; unsigned int length; } ms_cash2_salt; -//CUDA MSCASH2 IMPLEMENTATION +//TAKEN FROM CUDA MSCASH2 IMPLEMENTATION static struct fmt_tests tests[] = { - //{"$DCC2$test#a86012faf7d88d1fc037a69764a92cac", "password"}, - {"$DCC2$administrator#a150f71752b5d605ef0b2a1e98945611","a"}, - //{"$DCC2$administrator#c14eb8279e4233ec14e9d393637b65e2","ab"}, - //{"$DCC2$administrator#8ce9c0279b4e6f226f52d559f9c2c5f3","abc"}, - //{"$DCC2$administrator#2fc788d09fad7e26a92d12356fa44bdf","abcd"}, - //{"$DCC2$administrator#6aa19842ffea11f0f0c89f8ca8d245bd","abcde"}, + {"$DCC2$test#a86012faf7d88d1fc037a69764a92cac", "password"}, + {"$DCC2$administrator#a150f71752b5d605ef0b2a1e98945611","a"}, + {"$DCC2$administrator#c14eb8279e4233ec14e9d393637b65e2","ab"}, + {"$DCC2$administrator#8ce9c0279b4e6f226f52d559f9c2c5f3","abc"}, + {"$DCC2$administrator#2fc788d09fad7e26a92d12356fa44bdf","abcd"}, + {"$DCC2$administrator#6aa19842ffea11f0f0c89f8ca8d245bd","abcde"}, {NULL} }; - cl_uint *dcc_hash_host; - cl_uint *dcc2_hash_host; - unsigned char key_host[MAX_KEYS_PER_CRYPT][MAX_PLAINTEXT_LENGTH+1]; - unsigned char ciphertext_host[MAX_KEYS_PER_CRYPT][MAX_CIPHERTEXT_LENGTH+1]; - ms_cash2_salt currentsalt; - cl_platform_id pltfrmid; - cl_device_id devid[1]; - cl_context cntxt; - cl_command_queue cmdq; - cl_program prg; - cl_kernel krnl0; - cl_int err; + + cl_uint *dcc_hash_host; + + cl_uint *dcc_hash_host_temp; + + cl_uint *dcc2_hash_host; + + unsigned int current_numkeys; + + unsigned char key_host[MAX_KEYS_PER_CRYPT][MAX_PLAINTEXT_LENGTH+1]; + + unsigned char ciphertext_host[MAX_KEYS_PER_CRYPT][MAX_CIPHERTEXT_LENGTH+1]; + + ms_cash2_salt currentsalt; + + cl_platform_id pltfrmid; + + cl_device_id devid[1]; + + cl_context cntxt; + + cl_command_queue cmdq; + + cl_program prg; + + cl_kernel krnl0; + + cl_int err; @@ -78,8 +113,11 @@ static struct fmt_tests tests[] = { void md4_crypt(unsigned int *buffer, unsigned int *hash) { unsigned int a; + unsigned int b; + unsigned int c; + unsigned int d; // round 1 @@ -88,278 +126,417 @@ void md4_crypt(unsigned int *buffer, unsigned int *hash) c = INIT_MD4_C + (INIT_MD4_B ^ (d & (a ^ INIT_MD4_B))) + buffer[2]; c = (c << 11) | (c >> 21); b = INIT_MD4_B + (a ^ (c & (d ^ a))) + buffer[3]; b = (b << 19) | (b >> 13); + a += (d ^ (b & (c ^ d))) + buffer[4]; a = (a << 3 ) | (a >> 29); d += (c ^ (a & (b ^ c))) + buffer[5]; d = (d << 7 ) | (d >> 25); c += (b ^ (d & (a ^ b))) + buffer[6]; c = (c << 11) | (c >> 21); b += (a ^ (c & (d ^ a))) + buffer[7]; b = (b << 19) | (b >> 13); + a += (d ^ (b & (c ^ d))) + buffer[8] ; a = (a << 3 ) | (a >> 29); d += (c ^ (a & (b ^ c))) + buffer[9] ; d = (d << 7 ) | (d >> 25); c += (b ^ (d & (a ^ b))) + buffer[10]; c = (c << 11) | (c >> 21); b += (a ^ (c & (d ^ a))) + buffer[11]; b = (b << 19) | (b >> 13); + a += (d ^ (b & (c ^ d))) + buffer[12]; a = (a << 3 ) | (a >> 29); d += (c ^ (a & (b ^ c))) + buffer[13]; d = (d << 7 ) | (d >> 25); c += (b ^ (d & (a ^ b))) + buffer[14]; c = (c << 11) | (c >> 21); b += (a ^ (c & (d ^ a))) + buffer[15]; b = (b << 19) | (b >> 13); // round 2 + a += ((b & (c | d)) | (c & d)) + buffer[0] + SQRT_2; a = (a<<3 ) | (a>>29); d += ((a & (b | c)) | (b & c)) + buffer[4] + SQRT_2; d = (d<<5 ) | (d>>27); c += ((d & (a | b)) | (a & b)) + buffer[8] + SQRT_2; c = (c<<9 ) | (c>>23); b += ((c & (d | a)) | (d & a)) + buffer[12] + SQRT_2; b = (b<<13) | (b>>19); + a += ((b & (c | d)) | (c & d)) + buffer[1] + SQRT_2; a = (a<<3 ) | (a>>29); d += ((a & (b | c)) | (b & c)) + buffer[5] + SQRT_2; d = (d<<5 ) | (d>>27); c += ((d & (a | b)) | (a & b)) + buffer[9] + SQRT_2; c = (c<<9 ) | (c>>23); b += ((c & (d | a)) | (d & a)) + buffer[13] + SQRT_2; b = (b<<13) | (b>>19); + a += ((b & (c | d)) | (c & d)) + buffer[2] + SQRT_2; a = (a<<3 ) | (a>>29); d += ((a & (b | c)) | (b & c)) + buffer[6] + SQRT_2; d = (d<<5 ) | (d>>27); c += ((d & (a | b)) | (a & b)) + buffer[10] + SQRT_2; c = (c<<9 ) | (c>>23); b += ((c & (d | a)) | (d & a)) + buffer[14] + SQRT_2; b = (b<<13) | (b>>19); + a += ((b & (c | d)) | (c & d)) + buffer[3] + SQRT_2; a = (a<<3 ) | (a>>29); d += ((a & (b | c)) | (b & c)) + buffer[7] + SQRT_2; d = (d<<5 ) | (d>>27); c += ((d & (a | b)) | (a & b)) + buffer[11] + SQRT_2; c = (c<<9 ) | (c>>23); b += ((c & (d | a)) | (d & a)) + buffer[15] + SQRT_2; b = (b<<13) | (b>>19); // round 3 + a += (d ^ c ^ b) + buffer[0] + SQRT_3; a = (a << 3 ) | (a >> 29); d += (c ^ b ^ a) + buffer[8] + SQRT_3; d = (d << 9 ) | (d >> 23); c += (b ^ a ^ d) + buffer[4] + SQRT_3; c = (c << 11) | (c >> 21); b += (a ^ d ^ c) + buffer[12] + SQRT_3; b = (b << 15) | (b >> 17); + a += (d ^ c ^ b) + buffer[2] + SQRT_3; a = (a << 3 ) | (a >> 29); d += (c ^ b ^ a) + buffer[10] + SQRT_3; d = (d << 9 ) | (d >> 23); c += (b ^ a ^ d) + buffer[6] + SQRT_3; c = (c << 11) | (c >> 21); b += (a ^ d ^ c) + buffer[14] + SQRT_3; b = (b << 15) | (b >> 17); + a += (d ^ c ^ b) + buffer[1] + SQRT_3; a = (a << 3 ) | (a >> 29); d += (c ^ b ^ a) + buffer[9] + SQRT_3; d = (d << 9 ) | (d >> 23); c += (b ^ a ^ d) + buffer[5] + SQRT_3; c = (c << 11) | (c >> 21); b += (a ^ d ^ c) + buffer[13] + SQRT_3; b = (b << 15) | (b >> 17); + a += (d ^ c ^ b) + buffer[3] + SQRT_3; a = (a << 3 ) | (a >> 29); - d += (c ^ b ^ a) + buffer[11] + SQRT_3; d = (d << 9 ) | (d >> 23); c += (b ^ a ^ d) + buffer[7] + SQRT_3; c = (c << 11) | (c >> 21); b += (a ^ d ^ c) + buffer[15] + SQRT_3; b = (b << 15) | (b >> 17); + hash[0] = a + INIT_MD4_A; + hash[1] = b + INIT_MD4_B; + hash[2] = c + INIT_MD4_C; + hash[3] = d + INIT_MD4_D; } unsigned char *byte2hexstring(unsigned char * byte, unsigned int len) { - unsigned int i; - unsigned char *hexstring; + + unsigned int i; + + unsigned char *hexstring; - hexstring =(unsigned char*) malloc(len * 2 + 1); - memset(hexstring,0, 2 * len + 1); + hexstring =(unsigned char*) malloc(len * 2 + 1); + + memset(hexstring,0, 2 * len + 1); - for (i = 0; i < len; i++) - sprintf((char*)&hexstring[2 * i], "%02x", byte[i]); + for (i = 0; i < len; i++) + sprintf((char*)&hexstring[2 * i], "%02x", byte[i]); - return hexstring; + return hexstring; } -void PBKDF2_api(cl_uint *pass_api,cl_uint *salt_api,cl_uint saltlen_api,cl_uint *hash_out_api,int num) +void PBKDF2_api(cl_uint *pass_api,cl_uint *salt_api,cl_uint saltlen_api,cl_uint *hash_out_api,cl_uint num) { - cl_mem pass,salt,hash_out; - cl_event evnt; + cl_mem pass,salt,hash_out; + + cl_event evnt; - size_t N; + size_t N=num,M=64; - pass=clCreateBuffer(cntxt,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,4*num*sizeof(cl_uint),pass_api,&err); - if((pass==(cl_mem)0)) {printf("Create Buffer FAILED\n"); return;} - salt=clCreateBuffer(cntxt,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,MAX_SALT_LENGTH*sizeof(cl_uint)/2 +1,salt_api,&err); - if((salt==(cl_mem)0)) {printf("Create Buffer FAILED\n"); return;} + pass=clCreateBuffer(cntxt,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,4*num*sizeof(cl_uint),pass_api,&err); + if((pass==(cl_mem)0)) {printf("Create Buffer FAILED\n"); return;} + + salt=clCreateBuffer(cntxt,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,MAX_SALT_LENGTH*sizeof(cl_uint)/2 +1,salt_api,&err); + if((salt==(cl_mem)0)) {printf("Create Buffer FAILED\n"); return;} - hash_out=clCreateBuffer(cntxt,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,4*num*sizeof(cl_uint),hash_out_api,&err); - if((hash_out==(cl_mem)0)) {printf("Create Buffer FAILED\n"); return;} + hash_out=clCreateBuffer(cntxt,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,4*num*sizeof(cl_uint),hash_out_api,&err); + if((hash_out==(cl_mem)0)) {printf("Create Buffer FAILED\n"); return;} + if(clSetKernelArg(krnl0,0,sizeof(cl_mem),&pass)) {printf("Set Kernel FAILED.krnl0 arg0\n"); return;} - if(clSetKernelArg(krnl0,0,sizeof(cl_mem),&pass)) {printf("Set Kernel FAILED.krnl0 arg0\n"); return;} - if(clSetKernelArg(krnl0,1,sizeof(cl_mem),&salt)) {printf("Set Kernel FAILED.krnl0 arg1\n"); return;} - if(clSetKernelArg(krnl0,2,sizeof(cl_uint),&saltlen_api)) {printf("Set Kernel FAILED.krnl0 arg2\n"); return;} - if(clSetKernelArg(krnl0,3,sizeof(cl_mem),&hash_out)) {printf("Set Kernel FAILED.krnl0 arg3\n"); return;} - - N=num; - if(clEnqueueNDRangeKernel(cmdq,krnl0,1,NULL,&N,NULL,0,NULL,&evnt)) {printf("Enqueue Kernel FAILED.krnl0\n"); return;} - if(CL_SUCCESS!=clWaitForEvents(1,&evnt)) printf("SYNC FAILED\n"); - if(clEnqueueReadBuffer(cmdq,hash_out,CL_TRUE,0,4*num*sizeof(cl_uint),hash_out_api, 0, NULL, NULL)) {printf("Write Read FAILED\n"); return;} + if(clSetKernelArg(krnl0,1,sizeof(cl_mem),&salt)) {printf("Set Kernel FAILED.krnl0 arg1\n"); return;} + + if(clSetKernelArg(krnl0,2,sizeof(cl_uint),&saltlen_api)) {printf("Set Kernel FAILED.krnl0 arg2\n"); return;} + + if(clSetKernelArg(krnl0,3,sizeof(cl_uint),&num)) {printf("Set Kernel FAILED.krnl0 arg3\n"); return;} + + if(clSetKernelArg(krnl0,4,sizeof(cl_mem),&hash_out)) {printf("Set Kernel FAILED.krnl0 arg4\n"); return;} + + + if(clEnqueueNDRangeKernel(cmdq,krnl0,1,NULL,&N,&M,0,NULL,&evnt)) {printf("Enqueue Kernel FAILED.krnl0\n"); return;} + if(CL_SUCCESS!=clWaitForEvents(1,&evnt)) printf("SYNC FAILED\n"); + + + if(clEnqueueReadBuffer(cmdq,hash_out,CL_TRUE,0,4*num*sizeof(cl_uint),hash_out_api, 0, NULL, NULL)) {printf("Write Read FAILED\n"); return;} } void DCC(unsigned char *salt,unsigned char *username,unsigned int username_len,unsigned char *password,unsigned int *dcc_hash,unsigned int id) { - unsigned int i; - unsigned int buffer[16]; - unsigned int nt_hash[16]; - unsigned int password_len = strlen((const char*)password); + unsigned int i; + + unsigned int buffer[16]; + + unsigned int nt_hash[16]; + + unsigned int password_len = strlen((const char*)password); memset(nt_hash, 0, 64); - memset(buffer, 0, 64); + + memset(buffer, 0, 64); + // convert ASCII password to Unicode - for(i = 0; i < password_len >> 1; i++) - buffer[i] = password[2 * i] | (password[2 * i + 1] << 16); + for(i = 0; i < password_len >> 1; i++) + buffer[i] = password[2 * i] | (password[2 * i + 1] << 16); - // MD4 padding - if(password_len % 2 == 1) - buffer[i] = password[password_len - 1] | 0x800000; - else - buffer[i]=0x80; + + // MD4 padding + if(password_len % 2 == 1) + buffer[i] = password[password_len - 1] | 0x800000; + else + buffer[i]=0x80; + // put password length at end of buffer - buffer[14] = password_len << 4; + buffer[14] = password_len << 4; + // generate MD4 hash of the password (NT hash) - md4_crypt(buffer, nt_hash); + md4_crypt(buffer, nt_hash); + // concatenate NT hash and the username (salt) - memcpy((unsigned char *)nt_hash + 16, salt, username_len << 1); + memcpy((unsigned char *)nt_hash + 16, salt, username_len << 1); - i = username_len + 8; + + i = username_len + 8; + // MD4 padding - if(username_len % 2 == 1) - nt_hash[i >> 1] = username[username_len - 1] | 0x800000; - else - nt_hash[i >> 1] = 0x80; + if(username_len % 2 == 1) + nt_hash[i >> 1] = username[username_len - 1] | 0x800000; + else + nt_hash[i >> 1] = 0x80; + // put length at end of buffer - nt_hash[14] = i << 4; + nt_hash[14] = i << 4; - md4_crypt(nt_hash, (dcc_hash+4*id)); + + md4_crypt(nt_hash, (dcc_hash+4*id)); } + + static void init(struct fmt_main *pFmt) { - //Alocate memory for hashes and passwords - dcc_hash_host=(cl_uint*)malloc(4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); - dcc2_hash_host=(cl_uint*)malloc(4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); - memset(dcc_hash_host,0,4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); - memset(dcc2_hash_host,0,4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); - opencl_init("$JOHN/pbkdf2_kernel.cl", gpu_id, platform_id); - pltfrmid=platform[platform_id]; - devid[0]=devices[gpu_id]; - cntxt=context[gpu_id]; - cmdq=queue[gpu_id]; - prg=program[gpu_id]; - krnl0=clCreateKernel(prg,"PBKDF2",&err) ; - if(err) {printf("Create Kernel PBKDF2 FAILED\n"); return ;} + //Alocate memory for hashes and passwords + dcc_hash_host=(cl_uint*)malloc(4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); + + dcc_hash_host_temp=(cl_uint*)malloc(4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); + + dcc2_hash_host=(cl_uint*)malloc(4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); + + memset(dcc_hash_host,0,4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); + + memset(dcc2_hash_host,0,4*sizeof(cl_uint)*MAX_KEYS_PER_CRYPT); + + opencl_init("$JOHN/pbkdf2_kernel.cl", gpu_id, platform_id); + + pltfrmid=platform[platform_id]; + + devid[0]=devices[gpu_id]; + + cntxt=context[gpu_id]; + + cmdq=queue[gpu_id]; + + prg=program[gpu_id]; + + krnl0=clCreateKernel(prg,"PBKDF2",&err) ; + if(err) {printf("Create Kernel PBKDF2 FAILED\n"); return ;} } + + static int valid(char *ciphertext,struct fmt_main *pFmt) -{ char *hash; - int hashlength = 0; +{ + char *hash; + + int hashlength = 0; + if(strncmp(ciphertext, MSCASH2_PREFIX, strlen(MSCASH2_PREFIX)) != 0) return 0; + hash = strrchr(ciphertext, '#') + 1; + if (hash == NULL) - return 0; + return 0; - while (hash < ciphertext + strlen(ciphertext)) { - if (atoi16[ARCH_INDEX(*hash++)] == 0x7f) - return 0; - hashlength++; - } - if (hashlength != 32) - return 0; + + while (hash < ciphertext + strlen(ciphertext)) + { + if (atoi16[ARCH_INDEX(*hash++)] == 0x7f) + return 0; + + hashlength++; + } + + if (hashlength != 32) return 0; return 1; } + + static void *binary(char *ciphertext) { static unsigned int binary[4]; + int i; + char *hash ; - hash= strrchr(ciphertext, '#') + 1; + + hash= strrchr(ciphertext, '#') + 1; + if (hash == NULL) return binary; for (i = 0; i < 4; i++) { - sscanf(hash + (8 * i), "%08x", &binary[i]); - binary[i] = SWAP(binary[i]); + + sscanf(hash + (8 * i), "%08x", &binary[i]); + + binary[i] = SWAP(binary[i]); + } + return binary; } + + static void *salt(char *ciphertext) -{ static ms_cash2_salt salt; - unsigned int length; +{ + static ms_cash2_salt salt; + + unsigned int length; + char *pos ; + length=0; + pos=ciphertext + strlen(MSCASH2_PREFIX); + while (*pos != '#') - { if(length==MAX_SALT_LENGTH){return NULL;} - salt.username[length++] = *pos++; - } + { + if(length==MAX_SALT_LENGTH) + return NULL; + + salt.username[length++] = *pos++; + } + salt.username[length] = 0; + salt.length=length; + return &salt; } + + static void set_salt(void *salt) { memcpy(¤tsalt, salt, sizeof(ms_cash2_salt)); } + + static void set_key(char *key, int index) -{ int strlength,i; - strlength=strlen(key); +{ + int strlength,i; + + strlength=strlen(key); + for(i=0;i<=strlength;++i) - key_host[index][i]=key[i]; + key_host[index][i]=key[i]; } + + static char *get_key(int index ) -{ return (char *)key_host[index]; +{ + return (char *)key_host[index]; } + + static void crypt_all(int count) -{ unsigned int i; - unsigned char salt_unicode[MAX_SALT_LENGTH*2+1]; - cl_uint salt_host[MAX_SALT_LENGTH/2 +1]; - memset(salt_unicode,0,MAX_SALT_LENGTH*2+1); - memset(salt_host,0,(MAX_SALT_LENGTH/2 +1)*sizeof(cl_uint)); - if(currentsalt.length%2==1) - for(i = 0; i < (currentsalt.length >> 1) + 1; i++) - ((unsigned int *)salt_unicode)[i] = currentsalt.username[2 * i] | (currentsalt.username[2 * i + 1] << 16); - else - for(i = 0; i < (currentsalt.length >> 1) ; i++) - ((unsigned int *)salt_unicode)[i] = currentsalt.username[2 * i] | (currentsalt.username[2 * i + 1] << 16); +{ + unsigned int i,j,k; + + cl_uint temp[4]; + + if(count%64!=0) + count=(count/64 + 1)*64; + + current_numkeys=count; + + unsigned char salt_unicode[MAX_SALT_LENGTH*2+1]; + + cl_uint salt_host[MAX_SALT_LENGTH/2 +1]; + + memset(salt_unicode,0,MAX_SALT_LENGTH*2+1); + + memset(salt_host,0,(MAX_SALT_LENGTH/2 +1)*sizeof(cl_uint)); + + if(currentsalt.length%2==1) + + for(i = 0; i < (currentsalt.length >> 1) + 1; i++) + ((unsigned int *)salt_unicode)[i] = currentsalt.username[2 * i] | (currentsalt.username[2 * i + 1] << 16); + + else + + for(i = 0; i < (currentsalt.length >> 1) ; i++) + ((unsigned int *)salt_unicode)[i] = currentsalt.username[2 * i] | (currentsalt.username[2 * i + 1] << 16); for(i=0;i