diff -urpN john-1.7.6/run/sha1_opencl_kernel.cl john-1.7.6.new/run/sha1_opencl_kernel.cl --- john-1.7.6/run/sha1_opencl_kernel.cl 1970-01-01 01:00:00.000000000 +0100 +++ john-1.7.6.new/run/sha1_opencl_kernel.cl 2011-01-16 18:40:12.000000000 +0100 @@ -0,0 +1,202 @@ +/* + This code was taken and merged from pyrit opencl sha1 routines royger's sample ( http://royger.org/opencl/?p=12) + and largely inspired from md5_opencl_kernel.cl +*/ + +#define K0 0x5A827999 +#define K1 0x6ED9EBA1 +#define K2 0x8F1BBCDC +#define K3 0xCA62C1D6 + +#define H1 0x67452301 +#define H2 0xEFCDAB89 +#define H3 0x98BADCFE +#define H4 0x10325476 +#define H5 0xC3D2E1F0 + +#define SHA_BLOCK 16 +#define SSHA_NUM_KEYS 2 + +#ifndef uint32_t +#define uint32_t unsigned int +#endif + + +typedef struct { + uint32_t h0,h1,h2,h3,h4; +} SHA_DEV_CTX; + +void prepare_msg(__global uchar *s, char *dest) { + int i; + uint ulen; + + for(i = 0; i < SHA_BLOCK && s[i] != 0x80 ; i++){ + dest[i] = s[i]; + } + ulen = (i * 8) & 0xFFFFFFFF; + dest[i] = (char) 0x80; + i=i+1; + for(;i<60;i++){ + dest[i] = (char) 0; + } + dest[60] = ulen >> 24; + dest[61] = ulen >> 16; + dest[62] = ulen >> 8; + dest[63] = ulen; + + return; +} + +__kernel void sha1_crypt_kernel(__global const char *plain_key, __global SHA_DEV_CTX *digest){ + int t, word_pad, gid, msg_pad; + uint W[80], temp, A,B,C,D,E; + uchar msg[64]; + + gid = get_global_id(0); + word_pad = gid * 64; + msg_pad = gid * SHA_BLOCK; + + + A = H1; + B = H2; + C = H3; + D = H4; + E = H5; + + prepare_msg(&plain_key[msg_pad],msg); + + for (t = 0; t < 16; t++){ + W[t] = ((uchar) msg[ t * 4]) << 24; + W[t] |= ((uchar) msg[ t * 4 + 1]) << 16; + W[t] |= ((uchar) msg[ t * 4 + 2]) << 8; + W[t] |= (uchar) msg[ t * 4 + 3]; + } + +#undef R +#define R(t) \ +( \ + temp = W[(t - 3) & 0x0F] ^ W[(t - 8) & 0x0F] ^ \ + W[(t - 14) & 0x0F] ^ W[ t & 0x0F], \ + ( W[t & 0x0F] = rotate((int)temp,1) ) \ +) + +#undef P +#define P(a,b,c,d,e,x) \ +{ \ + e += rotate((int)a,5) + F(b,c,d) + K + x; b = rotate((int)b,30);\ +} + +#define F(x,y,z) (z ^ (x & (y ^ z))) +#define K 0x5A827999 + + P( A, B, C, D, E, W[0] ); + P( E, A, B, C, D, W[1] ); + P( D, E, A, B, C, W[2] ); + P( C, D, E, A, B, W[3] ); + P( B, C, D, E, A, W[4] ); + P( A, B, C, D, E, W[5] ); + P( E, A, B, C, D, W[6] ); + P( D, E, A, B, C, W[7] ); + P( C, D, E, A, B, W[8] ); + P( B, C, D, E, A, W[9] ); + P( A, B, C, D, E, W[10] ); + P( E, A, B, C, D, W[11] ); + P( D, E, A, B, C, W[12] ); + P( C, D, E, A, B, W[13] ); + P( B, C, D, E, A, W[14] ); + P( A, B, C, D, E, W[15] ); + P( E, A, B, C, D, R(16) ); + P( D, E, A, B, C, R(17) ); + P( C, D, E, A, B, R(18) ); + P( B, C, D, E, A, R(19) ); + +#undef K +#undef F + +#define F(x,y,z) (x ^ y ^ z) +#define K 0x6ED9EBA1 + + P( A, B, C, D, E, R(20) ); + P( E, A, B, C, D, R(21) ); + P( D, E, A, B, C, R(22) ); + P( C, D, E, A, B, R(23) ); + P( B, C, D, E, A, R(24) ); + P( A, B, C, D, E, R(25) ); + P( E, A, B, C, D, R(26) ); + P( D, E, A, B, C, R(27) ); + P( C, D, E, A, B, R(28) ); + P( B, C, D, E, A, R(29) ); + P( A, B, C, D, E, R(30) ); + P( E, A, B, C, D, R(31) ); + P( D, E, A, B, C, R(32) ); + P( C, D, E, A, B, R(33) ); + P( B, C, D, E, A, R(34) ); + P( A, B, C, D, E, R(35) ); + P( E, A, B, C, D, R(36) ); + P( D, E, A, B, C, R(37) ); + P( C, D, E, A, B, R(38) ); + P( B, C, D, E, A, R(39) ); + +#undef K +#undef F + +#define F(x,y,z) ((x & y) | (z & (x | y))) +#define K 0x8F1BBCDC + + P( A, B, C, D, E, R(40) ); + P( E, A, B, C, D, R(41) ); + P( D, E, A, B, C, R(42) ); + P( C, D, E, A, B, R(43) ); + P( B, C, D, E, A, R(44) ); + P( A, B, C, D, E, R(45) ); + P( E, A, B, C, D, R(46) ); + P( D, E, A, B, C, R(47) ); + P( C, D, E, A, B, R(48) ); + P( B, C, D, E, A, R(49) ); + P( A, B, C, D, E, R(50) ); + P( E, A, B, C, D, R(51) ); + P( D, E, A, B, C, R(52) ); + P( C, D, E, A, B, R(53) ); + P( B, C, D, E, A, R(54) ); + P( A, B, C, D, E, R(55) ); + P( E, A, B, C, D, R(56) ); + P( D, E, A, B, C, R(57) ); + P( C, D, E, A, B, R(58) ); + P( B, C, D, E, A, R(59) ); + +#undef K +#undef F + +#define F(x,y,z) (x ^ y ^ z) +#define K 0xCA62C1D6 + + P( A, B, C, D, E, R(60) ); + P( E, A, B, C, D, R(61) ); + P( D, E, A, B, C, R(62) ); + P( C, D, E, A, B, R(63) ); + P( B, C, D, E, A, R(64) ); + P( A, B, C, D, E, R(65) ); + P( E, A, B, C, D, R(66) ); + P( D, E, A, B, C, R(67) ); + P( C, D, E, A, B, R(68) ); + P( B, C, D, E, A, R(69) ); + P( A, B, C, D, E, R(70) ); + P( E, A, B, C, D, R(71) ); + P( D, E, A, B, C, R(72) ); + P( C, D, E, A, B, R(73) ); + P( B, C, D, E, A, R(74) ); + P( A, B, C, D, E, R(75) ); + P( E, A, B, C, D, R(76) ); + P( D, E, A, B, C, R(77) ); + P( C, D, E, A, B, R(78) ); + P( B, C, D, E, A, R(79) ); + +#undef K +#undef F + + digest[gid].h0 = as_uint(as_uchar4(A + H1).wzyx); + digest[gid].h1 = as_uint(as_uchar4(B + H2).wzyx); + digest[gid].h2 = as_uint(as_uchar4(C + H3).wzyx); + digest[gid].h3 = as_uint(as_uchar4(D + H4).wzyx); + digest[gid].h4 = as_uint(as_uchar4(E + H5).wzyx); +} diff -urpN john-1.7.6/src/Makefile john-1.7.6.new/src/Makefile --- john-1.7.6/src/Makefile 2011-01-16 17:13:42.000000000 +0100 +++ john-1.7.6.new/src/Makefile 2011-01-16 17:11:43.000000000 +0100 @@ -23,11 +23,11 @@ OMPFLAGS = #OMPFLAGS = -xopenmp # MD4, MD5 and OpenCL debugging #DEBUG = -DDEBUG -CFLAGS = -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG) -#CFLAGS = -g -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG) +#CFLAGS = -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG) +CFLAGS = -g -c -Wall -O2 -fomit-frame-pointer -I/usr/local/include $(OMPFLAGS) $(DEBUG) # -DHAVE_SKEY ASFLAGS = -c $(OMPFLAGS) -LDFLAGS = -L/usr/local/lib -L/usr/local/ssl/lib -lcrypto -lm $(OMPFLAGS) +LDFLAGS = -L/usr/local/lib -L/usr/local/ssl/lib -L$(ATISTREAMSDKROOT)/lib/x86_64 -lcrypto -lm $(OMPFLAGS) #LDFLAGS = -s -L/usr/local/lib -L/usr/local/ssl/lib -lcrypto -lm $(OMPFLAGS) # -lskey LDFLAGS_SOLARIS = -lrt -lnsl -lsocket @@ -62,6 +62,7 @@ JOHN_OBJS = \ hmacMD5_fmt.o \ IPB2_fmt.o \ rawSHA1_fmt.o \ + rawSHA1_opencl_fmt.o \ sha1_gen_fmt.o \ rawMD4_fmt.o \ md4_gen_fmt.o \ diff -urpN john-1.7.6/src/john.c john-1.7.6.new/src/john.c --- john-1.7.6/src/john.c 2011-01-16 17:13:42.000000000 +0100 +++ john-1.7.6.new/src/john.c 2011-01-16 17:12:03.000000000 +0100 @@ -47,6 +47,8 @@ extern struct fmt_main fmt_AFS, fmt_LM; extern struct fmt_main fmt_crypt; #endif +//extern struct fmt_main fmt_opencl_NSLDAPS; +extern struct fmt_main fmt_opencl_rawSHA1; extern struct fmt_main fmt_NT, fmt_XSHA; extern struct fmt_main fmt_PO; extern struct fmt_main fmt_rawMD5go; @@ -125,6 +127,8 @@ static void john_register_all(void) john_register_one(&fmt_AFS); john_register_one(&fmt_LM); + //john_register_one(&fmt_opencl_NSLDAPS); + john_register_one(&fmt_opencl_rawSHA1); john_register_one(&fmt_NT); john_register_one(&fmt_XSHA); john_register_one(&fmt_mscash); diff -urpN john-1.7.6/src/rawSHA1_opencl_fmt.c john-1.7.6.new/src/rawSHA1_opencl_fmt.c --- john-1.7.6/src/rawSHA1_opencl_fmt.c 1970-01-01 01:00:00.000000000 +0100 +++ john-1.7.6.new/src/rawSHA1_opencl_fmt.c 2011-01-16 18:39:59.000000000 +0100 @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2011 Samuele Giovanni Tonon + * samu at linuxasylum dot net + * Released under GPL license + */ + +#include + +#include "path.h" +#include "arch.h" +#include "misc.h" +#include "common.h" +#include "formats.h" +#include "sha.h" + +#define FORMAT_LABEL "raw-sha1-opencl" +#define FORMAT_NAME "Raw SHA-1 OpenCL" +#define ALGORITHM_NAME "raw-sha1-opencl" +#define SHA_TYPE "SHA-1" +#define BENCHMARK_COMMENT "" +#define BENCHMARK_LENGTH 0 + +#define PLAINTEXT_LENGTH 32 +#define CIPHERTEXT_LENGTH 40 + +#define BINARY_SIZE 20 +#define SALT_SIZE 0 + + +#define SHA_BLOCK 16 +#define SSHA_NUM_KEYS 1024*2048 + +#define MIN_KEYS_PER_CRYPT SSHA_NUM_KEYS +#define MAX_KEYS_PER_CRYPT SSHA_NUM_KEYS + +#ifndef uint32_t +#define uint32_t unsigned int +#endif + +typedef struct { + uint32_t h0,h1,h2,h3,h4; +} SHA_DEV_CTX; + + +cl_platform_id platform; +cl_device_id devices; +cl_context context; +cl_program program; +cl_command_queue queue; +cl_int ret_code; +cl_kernel sha1_crypt_kernel; +cl_mem pinned_saved_keys, pinned_partial_hashes, buffer_out, buffer_keys, buffer_hash, len_buffer; +static SHA_DEV_CTX *outbuffer; +static char *inbuffer; +static size_t global_work_size = SSHA_NUM_KEYS; +static size_t local_work_size = 256; +//static size_t local_work_size = 1; + +static struct fmt_tests rawsha1_tests[] = { + {"a9993e364706816aba3e25717850c26c9cd0d89d", "abc"}, + {"2fbf0eba37de1d1d633bc1ed943b907f9b360d4c", "azertyuiop1"}, + {"f879f8090e92232ed07092ebed6dc6170457a21d", "azertyuiop2"}, + {"1813c12f25e64931f3833b26e999e26e81f9ad24", "azertyuiop3"}, + {NULL} +}; + +static char saved_key[SSHA_NUM_KEYS][PLAINTEXT_LENGTH]; + +static void if_error_log(cl_int ret_code, const char *message) +{ + if(ret_code != CL_SUCCESS) { + printf("\nOpenCL: %s\n", message); + exit(-1); + } +} + +static int valid(char *ciphertext) +{ + int i; + + if (strlen(ciphertext) != CIPHERTEXT_LENGTH) return 0; + for (i = 0; i < CIPHERTEXT_LENGTH; i++){ + if (!( (('0' <= ciphertext[i])&&(ciphertext[i] <= '9')) || + (('a' <= ciphertext[i])&&(ciphertext[i] <= 'f')) + || (('A' <= ciphertext[i])&&(ciphertext[i] <= 'F')))) + return 0; + } + return 1; +} + +static void rawsha1_set_salt(void *salt) { } + +static void rawsha1_opencl_init(void) +{ + // load kernel source + char *source=(char*)mem_alloc(1024*16); + FILE *fp = fopen(path_expand("$JOHN/sha1_opencl_kernel.cl"),"r"); + if(!fp) + if_error_log(!CL_SUCCESS, "Source kernel not found!"); + size_t source_size = fread(source, sizeof(char), 1024*16, fp); + source[source_size] = 0; + fclose(fp); + + // get a platform and its information + size_t max_group_size; + char log[1024*64]; + ret_code = clGetPlatformIDs(1, &platform, NULL); + if_error_log(ret_code, "No OpenCL platform exist"); + ret_code = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(log), log, NULL); + if_error_log(ret_code, "Error querying PLATFORM_NAME"); + printf("\nOpenCL Platform: <<<%s>>>", log); + + // find an OpenCL device + //ret_code = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &devices, NULL); + ret_code = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &devices, NULL); + if_error_log(ret_code, "No OpenCL device of that type exist"); + ret_code = clGetDeviceInfo(devices, CL_DEVICE_NAME, sizeof(log), log, NULL); + if_error_log(ret_code, "Error querying DEVICE_NAME"); + printf(" and device: <<<%s>>>\n",log); + ret_code = clGetDeviceInfo(devices, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_group_size), &max_group_size, NULL); + if_error_log(ret_code, "Error querying MAX_WORK_GROUP_SIZE"); + + // create a context and command queue on the device. + context = clCreateContext(NULL, 1, &devices, NULL, NULL, &ret_code); + if_error_log(ret_code, "Error creating context"); + queue = clCreateCommandQueue(context, devices, 0, &ret_code); + if_error_log(ret_code, "Error creating command queue"); + + // submit the kernel source for compilation + program = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &ret_code); + if_error_log(ret_code,"Error creating program"); + ret_code = clBuildProgram(program, 1, &devices, NULL, NULL, NULL); + if(ret_code != CL_SUCCESS) { + printf("failed in clBuildProgram with %d\n", ret_code); + clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, sizeof(log), (void*)log, NULL); + printf("compilation log: %s\n", log); + exit(-1); + } + + // create kernel to execute + sha1_crypt_kernel = clCreateKernel(program, "sha1_crypt_kernel", &ret_code); + if_error_log(ret_code, "Error creating kernel. Double-check kernel name?"); + + // create Page-Locked (Pinned) memory for higher bandwidth between host and device (Nvidia Best Practices) + pinned_saved_keys = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, (SHA_BLOCK)*SSHA_NUM_KEYS, NULL, &ret_code); + if_error_log (ret_code, "Error creating page-locked memory"); + inbuffer = (char*)clEnqueueMapBuffer(queue, pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, (SHA_BLOCK)*SSHA_NUM_KEYS, 0, NULL, NULL, &ret_code); + if_error_log (ret_code, "Error mapping page-locked memory inbuffer"); + + memset(inbuffer,0,SHA_BLOCK*SSHA_NUM_KEYS); + + + pinned_partial_hashes = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, NULL, &ret_code); + if_error_log (ret_code, "Error creating page-locked memory"); + + outbuffer = (SHA_DEV_CTX *)clEnqueueMapBuffer(queue, pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, 0, NULL, NULL, &ret_code); + if_error_log (ret_code, "Error mapping page-locked memory outbuffer"); + + // create and set arguments + buffer_keys = clCreateBuffer(context, CL_MEM_READ_ONLY, (SHA_BLOCK)*SSHA_NUM_KEYS, NULL, &ret_code); + if_error_log (ret_code, "Error creating buffer keys argument"); + + buffer_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(SHA_DEV_CTX)*SSHA_NUM_KEYS, NULL, &ret_code); + if_error_log (ret_code,"Error creating buffer out argument"); + + ret_code = clSetKernelArg(sha1_crypt_kernel, 0, sizeof(buffer_keys), (void*) &buffer_keys); + if_error_log (ret_code, "Error setting argument 1"); + + ret_code = clSetKernelArg(sha1_crypt_kernel, 1, sizeof(buffer_out ), (void*) &buffer_out); + if_error_log (ret_code, "Error setting argument 3"); +} + +static void rawsha1_set_key(char *key, int index) { + int lenpwd; + + memset(saved_key[index],0,PLAINTEXT_LENGTH); + + strnzcpy(saved_key[index], key, PLAINTEXT_LENGTH); + lenpwd = strlen(saved_key[index]); + + memcpy(&(inbuffer[index*SHA_BLOCK]),saved_key[index],SHA_BLOCK); + inbuffer[index*SHA_BLOCK+lenpwd] = 0x80; + //printf("key=%s index=%d\n",saved_key[index],index); +} + +static char *rawsha1_get_key(int index) { + return saved_key[index]; +} + +static int rawsha1_cmp_all(void *binary, int count) { + unsigned int i = 0; + unsigned int b = ((unsigned int *)binary)[0]; + + for(; i