diff -urpN john-1.7.6_jumbo7/run/nt_opencl_kernel.c john-1.7.6_ntopencl/run/nt_opencl_kernel.c --- john-1.7.6_jumbo7/run/nt_opencl_kernel.c 1970-01-01 00:00:00.000000000 +0000 +++ john-1.7.6_ntopencl/run/nt_opencl_kernel.c 2010-09-05 00:47:39.000000000 +0000 @@ -0,0 +1,111 @@ +//Init values +#define INIT_A 0x67452301 +#define INIT_B 0xefcdab89 +#define INIT_C 0x98badcfe +#define INIT_D 0x10325476 + +#define SQRT_2 0x5a827999 +#define SQRT_3 0x6ed9eba1 + +#define PLAINTEXT_LENGTH 27 + +__kernel void nt_crypt(const __global char *keys , __global uint *bbbs,__global uint *nt_buffer1x,__global uint *output1x) +{ + uint i = get_global_id(0); + __global char *key=keys+(i*32); + __global uint *nt_buffer=nt_buffer1x+i*16; + + //set ket + uint nt_index = 0; + uint md4_size = 0; + + for(;key[md4_size] && md4_size in 2007. No copyright + * Written by Alain Espinosa in 2010. No copyright * is claimed, and the software is hereby placed in the public domain. * In case this attempt to disclaim copyright and place the software in the * public domain is deemed null and void, then the software is - * Copyright (c) 2007 Alain Espinosa and it is hereby released to the + * Copyright (c) 2010 Alain Espinosa and it is hereby released to the * general public under the following terms: * * Redistribution and use in source and binary forms, with or without @@ -21,6 +21,11 @@ #include "memory.h" #include "common.h" #include "formats.h" +#include "path.h" +#ifdef NT_OPENCL + #include +#endif + //Init values #define INIT_A 0x67452301 @@ -52,7 +57,42 @@ static struct fmt_tests tests[] = { #define BINARY_SIZE 16 #define SALT_SIZE 0 -#if defined (NT_X86_64) +#ifdef NT_OPENCL + #define NT_NUM_KEYS 1024*16 + + //Putting here for succeful compilation (Needed by assembly functions). + //Maybe useful in the future perform CPU and GPU cracking side by side + unsigned int *nt_buffer8x; + unsigned int *output8x; + unsigned int *nt_buffer4x; + unsigned int *output4x; + unsigned int *nt_buffer1x; + unsigned int *output1x; + + static cl_uint bbbs[NT_NUM_KEYS]; + static char saved_plain[32*NT_NUM_KEYS]; + + cl_command_queue queue; + cl_kernel nt_crypt_kernel; + cl_mem buffer_nt; + cl_mem buffer_out; + cl_mem buffer_keys; + cl_mem buffer_bbbs; + #define ALGORITHM_NAME "OpenCL 1.0" + #define NT_CRYPT_FUN nt_crypt_all_opencl + //Putting in this function error logs can degrade performance + static void nt_crypt_all_opencl(int count) + { + size_t global_work_size = NT_NUM_KEYS; + + //If you are using CPU device comment the following line for a small speeud + clEnqueueWriteBuffer(queue, buffer_keys, CL_TRUE, 0, sizeof(saved_plain), (void*)saved_plain, 0, NULL, NULL); + clEnqueueNDRangeKernel( queue, nt_crypt_kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); + clFinish( queue ); + //If you are using CPU device comment the following line for a small speeud + clEnqueueReadBuffer(queue, buffer_bbbs, CL_TRUE, 0, sizeof(bbbs), (void*)bbbs, 0, NULL, NULL); + } +#elif defined (NT_X86_64) #define NT_NUM_KEYS 32 unsigned int nt_buffer8x[16*NT_NUM_KEYS] __attribute__ ((aligned(16))); @@ -158,24 +198,88 @@ static struct fmt_tests tests[] = { } #endif -static unsigned int last_i[NT_NUM_KEYS]; -static char saved_plain[32*NT_NUM_KEYS]; - #define MIN_KEYS_PER_CRYPT NT_NUM_KEYS #define MAX_KEYS_PER_CRYPT NT_NUM_KEYS -static void fmt_NT_init(void) -{ - memset(last_i,0,4*NT_NUM_KEYS); -#if defined(NT_X86_64) - memset(nt_buffer8x,0,16*4*NT_NUM_KEYS); -#elif defined(NT_SSE2) - memset(nt_buffer4x,0,64*4*NT_NUM_KEYS1); - memset(nt_buffer1x,0,16*4*NT_NUM_KEYS1); +#ifdef NT_OPENCL + static void if_error_log(cl_int ret_code, char* message) + { + if (ret_code != CL_SUCCESS) + printf("\nOpenCL: %s\n",message); + } + static void fmt_NT_init(void) + { + FILE* file = NULL; + char log[1024*64]; // Build log + char *source; // Device Program Source + cl_int ret_code; + cl_platform_id platform; + cl_device_id device; + cl_context context; + cl_program program; + + // Load the source + source = (char*)mem_alloc(1024*16); // Assume max 16KB of source + file = fopen(path_expand("$JOHN/nt_opencl_kernel.c"),"r"); if_error_log((file != NULL ? CL_SUCCESS : -1),"Source kernel not found"); + size_t source_size = fread(source,sizeof(char),1024*16,file); + source[source_size] = 0; //NUll terminate it + fclose(file); + + // 1. Get a platform. + ret_code = clGetPlatformIDs( 1, &platform, NULL ); if_error_log (ret_code,"No OpenCL platform exist"); + + // 2. Find an OpenCL device. + // Use CL_DEVICE_TYPE_CPU for a CPU device + // Use CL_DEVICE_TYPE_GPU for a Graphic Card + // Use CL_DEVICE_TYPE_ACCELERATOR if you have, for example, an IBM Cell Blade accelerator + ret_code = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); if_error_log (ret_code,"No OpenCL device of that type exist"); + + // 3. Create a context and command queue on that device. + context = clCreateContext( NULL, 1, &device, NULL, NULL, &ret_code); if_error_log (ret_code,"Error creating context"); + queue = clCreateCommandQueue( context, device, 0, &ret_code ); if_error_log (ret_code,"Error creating command queue"); + + // 4. Perform runtime source compilation, and obtain kernel entry point. + program = clCreateProgramWithSource( context, 1, &source, NULL, &ret_code ); if_error_log (ret_code,"Error creating program"); + ret_code = clBuildProgram( program, 1, &device, NULL, NULL, NULL ); if_error_log (ret_code,"Error building program. Log:"); + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log), (void*)log, NULL); if_error_log (ret_code,log); + + // 5. Create kernel to execute + nt_crypt_kernel = clCreateKernel( program, "nt_crypt", &ret_code ); if_error_log (ret_code,"Error creating kernel"); + + // 6. Create and set arguments + buffer_nt = clCreateBuffer( context, CL_MEM_READ_WRITE, 16*sizeof(cl_uint)*NT_NUM_KEYS, NULL , &ret_code ); if_error_log (ret_code,"Error creating nt_crypt buffer"); + buffer_out = clCreateBuffer( context, CL_MEM_READ_WRITE, 4*sizeof(cl_uint)*NT_NUM_KEYS , NULL , &ret_code ); if_error_log (ret_code,"Error creating output buffer"); + // If you are using a CPU device uncomment the following 2 lines for a small speedup + //buffer_keys = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(saved_plain) , saved_plain , &ret_code );if_error_log (ret_code,"Error creating keys buffer"); + //buffer_bbbs = clCreateBuffer( context, CL_MEM_WRITE_ONLY| CL_MEM_USE_HOST_PTR, sizeof(bbbs) , bbbs , &ret_code );if_error_log (ret_code,"Error creating bbbs buffer"); + // For a general device + buffer_keys = clCreateBuffer( context, CL_MEM_READ_ONLY , sizeof(saved_plain) , NULL , &ret_code ); if_error_log (ret_code,"Error creating keys buffer"); + buffer_bbbs = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof(bbbs) , NULL , &ret_code ); if_error_log (ret_code,"Error creating bbbs buffer"); + + ret_code = clSetKernelArg(nt_crypt_kernel, 0, sizeof(buffer_keys), (void*) &buffer_keys); if_error_log (ret_code,"Error setting argument 0"); + ret_code = clSetKernelArg(nt_crypt_kernel, 1, sizeof(buffer_bbbs), (void*) &buffer_bbbs); if_error_log (ret_code,"Error setting argument 1"); + ret_code = clSetKernelArg(nt_crypt_kernel, 2, sizeof(buffer_nt) , (void*) &buffer_nt); if_error_log (ret_code,"Error setting argument 2"); + ret_code = clSetKernelArg(nt_crypt_kernel, 3, sizeof(buffer_out) , (void*) &buffer_out); if_error_log (ret_code,"Error setting argument 3"); + + MEM_FREE(source); + } #else - memset(nt_buffer1x,0,16*4*NT_NUM_KEYS); + static unsigned int last_i[NT_NUM_KEYS]; + static char saved_plain[32*NT_NUM_KEYS]; + + static void fmt_NT_init(void) + { + memset(last_i,0,4*NT_NUM_KEYS); + #if defined(NT_X86_64) + memset(nt_buffer8x,0,16*4*NT_NUM_KEYS); + #elif defined(NT_SSE2) + memset(nt_buffer4x,0,64*4*NT_NUM_KEYS1); + memset(nt_buffer1x,0,16*4*NT_NUM_KEYS1); + #else + memset(nt_buffer1x,0,16*4*NT_NUM_KEYS); + #endif + } #endif -} static char * nt_split(char *ciphertext, int index) { @@ -266,7 +370,9 @@ static int binary_hash_2(void *binary) static int get_hash_0(int index) { -#if defined(NT_X86_64) +#ifdef NT_OPENCL + return bbbs[index] & 0x0F; +#elif defined(NT_X86_64) return output8x[32*(index>>3)+8+index%8] & 0x0F; #elif defined(NT_SSE2) if(index>3)+8+index%8] & 0xFF; #elif defined(NT_SSE2) if(index>3)+8+index%8] & 0x0FFF; #elif defined(NT_SSE2) if(index>3)+index%8; - - for(;key[md4_size] && md4_size>2)+index%4; + + unsigned int i=0; + unsigned int md4_size=0; + unsigned int saved_base=index<<5; + unsigned int temp; + int buff_base; + #if defined(NT_X86_64) + unsigned int last_length=last_i[index]<<2; + + buff_base=128*(index>>3)+index%8; - for(;key[md4_size] && md4_size>2)+index%4; + + for(;key[md4_size] && md4_size>=1; + + for(;i<=last_length;i++) + nt_buffer1x[i+buff_base]=0; + + last_i[index]=md4_size>>1; + + nt_buffer1x[14+buff_base] = md4_size << 4; + } + #else + buff_base=index<<4; for(;key[md4_size] && md4_size>=1; - - for(;i<=last_length;i++) - nt_buffer1x[i+buff_base]=0; + for(;i<=last_i[index];i++) + nt_buffer1x[buff_base+i]=0; last_i[index]=md4_size>>1; - nt_buffer1x[14+buff_base] = md4_size << 4; + nt_buffer1x[buff_base+14] = md4_size << 4; + #endif } -#else - buff_base=index<<4; - - for(;key[md4_size] && md4_size>1; - - nt_buffer1x[buff_base+14] = md4_size << 4; #endif -} static char *get_key(int index) {