>From bd5944d4e115c08865b556183dca61b678899ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claudio=20Andr=C3=A9?= Date: Sat, 10 Mar 2012 16:04:42 -0300 Subject: [PATCH] OpenCL Crypt 512 (development) --- src/Makefile | 4 +- src/common-opencl.c | 11 +- src/john.c | 2 + src/opencl/cryptsha512_kernel.cl | 346 +++++++++++++++++++++++ src/opencl_cryptsha512.h | 79 ++++++ src/opencl_cryptsha512_fmt.c | 564 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 1004 insertions(+), 2 deletions(-) create mode 100644 src/opencl/cryptsha512_kernel.cl create mode 100644 src/opencl_cryptsha512.h create mode 100644 src/opencl_cryptsha512_fmt.c diff --git a/src/Makefile b/src/Makefile index 388f4f0..2e5da72 100644 --- a/src/Makefile +++ b/src/Makefile @@ -119,7 +119,8 @@ JOHN_OBJS = \ OCL_OBJS = \ common-opencl.o opencl_mysqlsha1_fmt.o \ opencl_cryptmd5_fmt.o opencl_phpass_fmt.o opencl_rawsha1_fmt.o \ - opencl_nt_fmt.o opencl_rawmd5_fmt.o opencl_nsldaps_fmt.o + opencl_nt_fmt.o opencl_rawmd5_fmt.o opencl_nsldaps_fmt.o \ + opencl_cryptsha512_fmt.o CUDA_OBJS = \ cuda_common.o \ @@ -318,6 +319,7 @@ linux-x86-64-opencl: CFLAGS="$(CFLAGS) -I$(OCLROOT)/include -I$(OCLROOT)/include -DHAVE_CRYPT -DCL_VERSION_1_0 -DHAVE_DL" \ LDFLAGS="$(LDFLAGS) -L$(OCLROOT)/lib/x86_64 -L$(OCLROOT)/lib64 -lcrypt -lOpenCL -ldl" $(CP) opencl/*.cl ../run/ + $(CP) opencl_cryptsha512.h ../run/ linux-x86-64-cuda: $(LN) x86-64.h arch.h diff --git a/src/common-opencl.c b/src/common-opencl.c index 02f2d6e..96dfec2 100644 --- a/src/common-opencl.c +++ b/src/common-opencl.c @@ -83,6 +83,14 @@ static void dev_init(unsigned int dev_id, unsigned int platform_id) HANDLE_CLERROR(ret_code, "Error creating command queue"); } +static char * include_source(char *pathname) +{ + static char include[PATH_BUFFER_SIZE]; + sprintf(include, "-I %s", path_expand(pathname)); + + return include; +} + static void build_kernel(int dev_id) { @@ -94,7 +102,8 @@ static void build_kernel(int dev_id) HANDLE_CLERROR(ret_code, "Error while creating program"); cl_int build_code; - build_code = clBuildProgram(program[dev_id], 0, NULL, "", NULL, NULL); + build_code = clBuildProgram(program[dev_id], 0, NULL, + include_source("$JOHN/"), NULL, NULL); HANDLE_CLERROR(clGetProgramBuildInfo(program[dev_id], devices[dev_id], CL_PROGRAM_BUILD_LOG, sizeof(opencl_log), (void *) opencl_log, diff --git a/src/john.c b/src/john.c index c18c13c..bb59bd4 100644 --- a/src/john.c +++ b/src/john.c @@ -110,6 +110,7 @@ extern struct fmt_main fmt_opencl_rawSHA1; extern struct fmt_main fmt_opencl_cryptMD5; extern struct fmt_main fmt_opencl_phpass; extern struct fmt_main fmt_opencl_mysqlsha1; +extern struct fmt_main fmt_opencl_cryptsha512; #endif #ifdef HAVE_CUDA extern struct fmt_main fmt_cuda_cryptmd5; @@ -223,6 +224,7 @@ static void john_register_all(void) john_register_one(&fmt_opencl_cryptMD5); john_register_one(&fmt_opencl_phpass); john_register_one(&fmt_opencl_mysqlsha1); + john_register_one(&fmt_opencl_cryptsha512); #endif #ifdef HAVE_CUDA diff --git a/src/opencl/cryptsha512_kernel.cl b/src/opencl/cryptsha512_kernel.cl new file mode 100644 index 0000000..6989267 --- /dev/null +++ b/src/opencl/cryptsha512_kernel.cl @@ -0,0 +1,346 @@ +/* +* This software is Copyright (c) 2011 Lukas Odzioba +* and it is hereby released to the general public under the following terms: +* Redistribution and use in source and binary forms, with or without modification, are permitted. +*/ + +#include "opencl_cryptsha512.h" +//#pragma OPENCL EXTENSION cl_amd_printf : enable + +__constant uint64_t k[] = { + 0x428a2f98d728ae22LL, 0x7137449123ef65cdLL, 0xb5c0fbcfec4d3b2fLL, + 0xe9b5dba58189dbbcLL, + 0x3956c25bf348b538LL, 0x59f111f1b605d019LL, 0x923f82a4af194f9bLL, + 0xab1c5ed5da6d8118LL, + 0xd807aa98a3030242LL, 0x12835b0145706fbeLL, 0x243185be4ee4b28cLL, + 0x550c7dc3d5ffb4e2LL, + 0x72be5d74f27b896fLL, 0x80deb1fe3b1696b1LL, 0x9bdc06a725c71235LL, + 0xc19bf174cf692694LL, + 0xe49b69c19ef14ad2LL, 0xefbe4786384f25e3LL, 0x0fc19dc68b8cd5b5LL, + 0x240ca1cc77ac9c65LL, + 0x2de92c6f592b0275LL, 0x4a7484aa6ea6e483LL, 0x5cb0a9dcbd41fbd4LL, + 0x76f988da831153b5LL, + 0x983e5152ee66dfabLL, 0xa831c66d2db43210LL, 0xb00327c898fb213fLL, + 0xbf597fc7beef0ee4LL, + 0xc6e00bf33da88fc2LL, 0xd5a79147930aa725LL, 0x06ca6351e003826fLL, + 0x142929670a0e6e70LL, + 0x27b70a8546d22ffcLL, 0x2e1b21385c26c926LL, 0x4d2c6dfc5ac42aedLL, + 0x53380d139d95b3dfLL, + 0x650a73548baf63deLL, 0x766a0abb3c77b2a8LL, 0x81c2c92e47edaee6LL, + 0x92722c851482353bLL, + 0xa2bfe8a14cf10364LL, 0xa81a664bbc423001LL, 0xc24b8b70d0f89791LL, + 0xc76c51a30654be30LL, + 0xd192e819d6ef5218LL, 0xd69906245565a910LL, 0xf40e35855771202aLL, + 0x106aa07032bbd1b8LL, + 0x19a4c116b8d2d0c8LL, 0x1e376c085141ab53LL, 0x2748774cdf8eeb99LL, + 0x34b0bcb5e19b48a8LL, + 0x391c0cb3c5c95a63LL, 0x4ed8aa4ae3418acbLL, 0x5b9cca4f7763e373LL, + 0x682e6ff3d6b2b8a3LL, + 0x748f82ee5defb2fcLL, 0x78a5636f43172f60LL, 0x84c87814a1f0ab72LL, + 0x8cc702081a6439ecLL, + 0x90befffa23631e28LL, 0xa4506cebde82bde9LL, 0xbef9a3f7b2c67915LL, + 0xc67178f2e372532bLL, + 0xca273eceea26619cLL, 0xd186b8c721c0c207LL, 0xeada7dd6cde0eb1eLL, + 0xf57d4f7fee6ed178LL, + 0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, 0x113f9804bef90daeLL, + 0x1b710b35131c471bLL, + 0x28db77f523047d84LL, 0x32caab7b40c72493LL, 0x3c9ebe0a15c9bebcLL, + 0x431d67c49c100d4cLL, + 0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL, + 0x6c44198c4a475817LL, +}; + +void init_ctx(sha512_ctx * ctx) { + ctx->H[0] = 0x6a09e667f3bcc908LL; + ctx->H[1] = 0xbb67ae8584caa73bLL; + ctx->H[2] = 0x3c6ef372fe94f82bLL; + ctx->H[3] = 0xa54ff53a5f1d36f1LL; + ctx->H[4] = 0x510e527fade682d1LL; + ctx->H[5] = 0x9b05688c2b3e6c1fLL; + ctx->H[6] = 0x1f83d9abfb41bd6bLL; + ctx->H[7] = 0x5be0cd19137e2179LL; + ctx->total = 0; + ctx->buflen = 0; +} + +void memcpy_1(uint8_t * dest, const uint8_t * src, const size_t n) { + for (int i = 0; i < n; i++) + dest[i] = src[i]; +} + +void memcpy (uint8_t * dest, buffer_64 * src, const size_t n) { + for (int i = 0; i < n; i++) + dest[i] = src->mem_08[i]; +} + +void depura(buffer_64 * ctx) +{/* + printf("\nCtx: "); + for(int i=0;i<8;i++) + printf("%08x ",ctx[i]);*/ +} + +void debug_ctx(sha512_ctx * ctx) +{/* + printf("\nCtx: "); + int i; + for(i=0;i<8;i++) + printf("%08x ",ctx->H[i]); + printf(" <%03d>",ctx->total); + printf(" <%03d>",ctx->buflen);*/ +} + +void insert_to_buffer(sha512_ctx * ctx, const uint8_t * string, + const uint8_t len) { + uint8_t *d = ctx->buffer->mem_08 + ctx->buflen; //Position ctx->buflen (in char size) + memcpy_1(d, string, len); + ctx->buflen += len; +} + +void sha512_block(sha512_ctx * ctx) { + int i; + uint64_t a = ctx->H[0]; + uint64_t b = ctx->H[1]; + uint64_t c = ctx->H[2]; + uint64_t d = ctx->H[3]; + uint64_t e = ctx->H[4]; + uint64_t f = ctx->H[5]; + uint64_t g = ctx->H[6]; + uint64_t h = ctx->H[7]; + + uint64_t w[16]; + + uint64_t *data = ctx->buffer->mem_64; //The same as buffer[0] + //#pragma unroll 16 + for (i = 0; i < 16; i++) + w[i] = SWAP64(data[i]); + + uint64_t t1, t2; + //#pragma unroll 16 + for (i = 0; i < 16; i++) { + + + t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g); + t2 = Maj(a, b, c) + Sigma0(a); + + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + + } + + + for (i = 16; i < 80; i++) { + + + w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15]; + t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g); + t2 = Maj(a, b, c) + Sigma0(a); + + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + + } + + ctx->H[0] += a; + ctx->H[1] += b; + ctx->H[2] += c; + ctx->H[3] += d; + ctx->H[4] += e; + ctx->H[5] += f; + ctx->H[6] += g; + ctx->H[7] += h; +} + +void ctx_append_1(sha512_ctx * ctx) { + uint32_t length = ctx->buflen; + int i = 127 - length; + uint8_t *d = ctx->buffer->mem_08 + length; + *d++ = 0x80; + + while (i--) { + d[i] = 0; + } +} + +void ctx_add_length(sha512_ctx * ctx) { + uint64_t *blocks = ctx->buffer->mem_64; + blocks[15] = SWAP64((uint64_t) (ctx->total * 8)); +} + +void finish_ctx(sha512_ctx * ctx) { + ctx_append_1(ctx); + ctx_add_length(ctx); + ctx->buflen = 0; +} + +void ctx_update(sha512_ctx * ctx, uint8_t *string, uint8_t len) { + ctx->total += len; + uint8_t startpos = ctx->buflen; + uint8_t partsize; + if (startpos + len <= 128) { + partsize = len; + } else + partsize = 128 - startpos; + + insert_to_buffer(ctx, string, partsize); + if (ctx->buflen == 128) { + uint8_t offset = 128 - startpos; + sha512_block(ctx); + ctx->buflen = 0; + insert_to_buffer(ctx, (string + offset), len - offset); + } +} + +void clear_ctx_buffer(sha512_ctx * ctx) { + + uint32_t *w = ctx->buffer->mem_32; + //#pragma unroll 30 + for (int i = 0; i < 30; i++) //TODO: why 30? Not 32? + w[i] = 0; + + ctx->buflen = 0; +} + +void sha512_digest(sha512_ctx * ctx, uint64_t * result) { + uint8_t i; + if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block + finish_ctx(ctx); + sha512_block(ctx); + } else { + uint8_t moved = 1; + if (ctx->buflen < 128) { //data and 0x80 fits in one block + ctx_append_1(ctx); + moved = 0; + } + sha512_block(ctx); + clear_ctx_buffer(ctx); + if (moved) + ctx->buffer->mem_08[0] = 0x80; //append 1,the rest is already clean + ctx_add_length(ctx); + sha512_block(ctx); + } + //#pragma unroll 8 + for (i = 0; i < 8; i++) + result[i] = SWAP64(ctx->H[i]); +} + +void sha512crypt(uint8_t *pass, uint8_t passlength, + crypt_sha512_salt cuda_salt, + __global crypt_sha512_hash * output) { + + buffer_64 alt_result[8], temp_result[8]; + int i; + sha512_ctx ctx; + init_ctx(&ctx); + + ctx_update(&ctx, pass, passlength); + ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen); + ctx_update(&ctx, pass, passlength); + + sha512_digest(&ctx, alt_result->mem_64); + init_ctx(&ctx); +depura(alt_result); ;//TODO: #################33 + ctx_update(&ctx, pass, passlength); + ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen); + ctx_update(&ctx, alt_result->mem_08, passlength); + +depura(alt_result); ;//TODO: #################33 + + for (i = passlength; i > 0; i >>= 1) { + if ((i & 1) != 0) + ctx_update(&ctx, alt_result->mem_08, 64); + else + ctx_update(&ctx, pass, passlength); + } + sha512_digest(&ctx, alt_result->mem_64); + + + init_ctx(&ctx); + for (i = 0; i < passlength; i++) + ctx_update(&ctx, pass, passlength); + + sha512_digest(&ctx, temp_result->mem_64); + + uint8_t sp_sequence[16 + 4]; + uint8_t *p_sequence = sp_sequence; + memcpy(p_sequence, temp_result, passlength); + + init_ctx(&ctx); + + /* For every character in the password add the entire password. */ + for (i = 0; i < 16 + (alt_result->mem_08)[0]; i++) //Analyse, TÁ CERTO?### + ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen); + + /* Finish the digest. */ + sha512_digest(&ctx, temp_result->mem_64); + + uint8_t saltlength = cuda_salt.saltlen; + + uint8_t ss_sequence[16 + 4]; + uint8_t *s_sequence = ss_sequence; + memcpy(s_sequence, temp_result, saltlength); + + /* Repeatedly run the collected hash value through SHA512 to + burn CPU cycles. */ + for (i = 0; i < cuda_salt.rounds; i++) { + init_ctx(&ctx); + + if ((i & 1) != 0) + ctx_update(&ctx, p_sequence, passlength); + else + ctx_update(&ctx, alt_result->mem_08, 64); + + if ((i % 3) != 0) + ctx_update(&ctx, s_sequence, saltlength); + + if ((i % 7) != 0) + ctx_update(&ctx, p_sequence, passlength); + + if ((i & 1) != 0) + ctx_update(&ctx, alt_result->mem_08, 64); + else + ctx_update(&ctx, p_sequence, passlength); + + sha512_digest(&ctx, alt_result->mem_64); + } + //#pragma unroll 8 + for (i = 0; i < 8; i++) + output->v[i] = alt_result[i].mem_64[0]; +} + +__kernel void kernel_crypt(__constant crypt_sha512_salt * hsalt, + __constant crypt_sha512_password * inbuffer, + __global crypt_sha512_hash * outbuffer) { + + uint8_t pass[PLAINTEXT_LENGTH]; + crypt_sha512_salt salt_data; + + //Get the task to be done + uint32_t idx = get_global_id(0); + + //Get password information, put in faster memory. + for (int i = 0; i < inbuffer[idx].length; i++) + pass[i] = inbuffer[idx].v[i]; + + //Get salt information, put in faster memory. + salt_data.saltlen = hsalt->saltlen; + salt_data.rounds = hsalt->rounds; + + for (int i = 0; i < salt_data.saltlen; i++) + salt_data.salt[i] = hsalt->salt[i]; + + //Do the job + sha512crypt(pass, inbuffer[idx].length, salt_data, &outbuffer[idx]); +} diff --git a/src/opencl_cryptsha512.h b/src/opencl_cryptsha512.h new file mode 100644 index 0000000..f8f1852 --- /dev/null +++ b/src/opencl_cryptsha512.h @@ -0,0 +1,79 @@ +/* +* This software is Copyright (c) 2011 Lukas Odzioba +* and it is hereby released to the general public under the following terms: +* Redistribution and use in source and binary forms, with or without modification, are permitted. +*/ +#ifndef _CRYPTSHA512_H +#define _CRYPTSHA512_H + +//Type names definition. ///TODO: move to a new file and share this new file where needed. +#define uint8_t unsigned char +#define uint16_t unsigned short +#define uint32_t unsigned int +#define uint64_t ulong //Tip: unsigned long long int failed on compile (AMD). + +//Functions. +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#define MIN(x,y) ((x) < (y) ? (x) : (y)) + +#define ROUNDS_DEFAULT 5000 +#define ROUNDS_MIN 1000 +#define ROUNDS_MAX 999999999 + +#define SALT_SIZE 16 +#define PLAINTEXT_LENGTH 16 +#define KEYS_PER_CRYPT 1024*2048 + +#define rol(x,n) ((x << n) | (x >> (64-n))) +#define ror(x,n) ((x >> n) | (x << (64-n))) +#define Ch(x,y,z) ((x & y) ^ ( (~x) & z)) +#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Sigma0(x) ((ror(x,28)) ^ (ror(x,34)) ^ (ror(x,39))) +#define Sigma1(x) ((ror(x,14)) ^ (ror(x,18)) ^ (ror(x,41))) +#define sigma0(x) ((ror(x,1)) ^ (ror(x,8)) ^ (x>>7)) +#define sigma1(x) ((ror(x,19)) ^ (ror(x,61)) ^ (x>>6)) + +# define SWAP32(n) \ + (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) + +# define SWAP64(n) \ + (((n) << 56) \ + | (((n) & 0xff00) << 40) \ + | (((n) & 0xff0000) << 24) \ + | (((n) & 0xff000000) << 8) \ + | (((n) >> 8) & 0xff000000) \ + | (((n) >> 24) & 0xff0000) \ + | (((n) >> 40) & 0xff00) \ + | ((n) >> 56)) + +//Data types. +typedef union { + uint8_t mem_08[8]; + uint16_t mem_16[4]; + uint32_t mem_32[2]; + uint64_t mem_64[1]; +} buffer_64; + +typedef struct { + uint64_t H[8]; //512 bits + uint32_t total; + uint32_t buflen; + buffer_64 buffer[16]; //1024bits +} sha512_ctx; + +typedef struct { + uint32_t rounds; + uint8_t saltlen; + uint8_t salt[SALT_SIZE]; +} crypt_sha512_salt; + +typedef struct { + uint8_t length; + uint8_t v[PLAINTEXT_LENGTH]; +} crypt_sha512_password; + +typedef struct { + uint64_t v[8]; //512 bits +} crypt_sha512_hash; + +#endif \ No newline at end of file diff --git a/src/opencl_cryptsha512_fmt.c b/src/opencl_cryptsha512_fmt.c new file mode 100644 index 0000000..0b847c3 --- /dev/null +++ b/src/opencl_cryptsha512_fmt.c @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2011 Samuele Giovanni Tonon + * samu at linuxasylum dot net + * This program comes with ABSOLUTELY NO WARRANTY; express or + * implied . + * This is free software, and you are welcome to redistribute it + * under certain conditions; as expressed here + * http://www.gnu.org/licenses/gpl-2.0.html + */ + +#include +#include "common-opencl.h" +#include "opencl_cryptsha512.h" + + +#define FORMAT_LABEL "cryptsha512-opencl" +#define FORMAT_NAME "crypt SHA-512 OpenCL" + +#if ARCH_BITS >= 64 +#define ALGORITHM_NAME "OpenSSL 64/" ARCH_BITS_STR +#else +#define ALGORITHM_NAME "OpenSSL 32/" ARCH_BITS_STR +#endif + +#define BENCHMARK_COMMENT " rounds=5000" +#define BENCHMARK_LENGTH -1 + +#define BINARY_SIZE (3+16+86) ///TODO: Magic number? + +#define MIN_KEYS_PER_CRYPT 1024 +#define MAX_KEYS_PER_CRYPT KEYS_PER_CRYPT + +static crypt_sha512_password *plaintext; // plaintext ciphertexts +static crypt_sha512_hash *out_hashes; // calculated hashes +static crypt_sha512_salt salt_data; + +cl_mem salt_info; //Salt information. +cl_mem buffer_in; //Plaintext buffer. +cl_mem buffer_out; //Hash keys (output) +cl_mem pinned_saved_keys, pinned_partial_hashes; + +cl_command_queue queue_prof; +cl_kernel crypt_kernel; + +static size_t max_keys_per_crypt = KEYS_PER_CRYPT; + +static struct fmt_tests tests[] = { + {"$6$saltstring$svn8UoSVapNtMuq1ukKS4tPQd8iKwSMHWjl/O817G3uBnIFNjnQJuesI68u4OTLiBFdcbYEdFCoEOfaS35inz1", "Hello world!"}, + {"$6$LKO/Ute40T3FNF95$6S/6T2YuOIHY0N3XpLKABJ3soYcXD9mB7uVbtEZDj/LNscVhZoZ9DEH.sBciDrMsHOWOoASbNLTypH/5X26gN0", "U*U*U*U*"}, + {"$6$LKO/Ute40T3FNF95$wK80cNqkiAUzFuVGxW6eFe8J.fSVI65MD5yEm8EjYMaJuDrhwe5XXpHDJpwF/kY.afsUs1LlgQAaOapVNbggZ1", "U*U***U"}, + {"$6$OmBOuxFYBZCYAadG$WCckkSZok9xhp4U1shIZEV7CCVwQUwMVea7L3A77th6SaE9jOPupEMJB.z0vIWCDiN9WLh2m9Oszrj5G.gt330", "*U*U*U*U"}, + {"$6$ojWH1AiTee9x1peC$QVEnTvRVlPRhcLQCk/HnHaZmlGAAjCfrAN0FtOsOnUk5K5Bn/9eLHHiRzrTzaIKjW9NTLNIBUCtNVOowWS2mN.", ""}, + {NULL} +}; + +//Initialization and finalization functions +static void create_clobj(int kpc) { + pinned_saved_keys = clCreateBuffer(context[gpu_id], + CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, + sizeof(crypt_sha512_password) * kpc, NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_keys"); + + plaintext = (crypt_sha512_password *) clEnqueueMapBuffer(queue[gpu_id], + pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, + sizeof(crypt_sha512_password) * kpc, 0, NULL, NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_plain"); + + pinned_partial_hashes = clCreateBuffer(context[gpu_id], + CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, + sizeof(crypt_sha512_hash) * kpc, NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_partial_hashes"); + + out_hashes = (crypt_sha512_hash *) clEnqueueMapBuffer(queue[gpu_id], + pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0, + sizeof(crypt_sha512_hash) * kpc, 0, NULL, NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error mapping page-locked memory out_hashes"); + + // create arguments (buffers) + salt_info = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, + sizeof(crypt_sha512_salt), NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error creating data_info out argument"); + + buffer_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, + sizeof(crypt_sha512_password) * kpc, NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys"); + + buffer_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, + sizeof(crypt_sha512_hash) * kpc, NULL, &ret_code); + HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_out"); + + //Set kernel arguments + HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof (cl_mem), + (void *) &salt_info), "Error setting argument 0"); + HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof (cl_mem), + (void *) &buffer_in), "Error setting argument 1"); + HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof (cl_mem), + (void *) &buffer_out), "Error setting argument 2"); + + memset(plaintext, '\0', sizeof(crypt_sha512_password) * kpc); + salt_data.saltlen = 0; + salt_data.rounds = 0; + max_keys_per_crypt = kpc; +} + +static void release_clobj(void) { + cl_int ret_code; + + ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_partial_hashes, + out_hashes, 0, NULL, NULL); + HANDLE_CLERROR(ret_code, "Error Ummapping out_hashes"); + + ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_keys, + plaintext, 0, NULL, NULL); + HANDLE_CLERROR(ret_code, "Error Ummapping saved_plain"); + + ret_code = clReleaseMemObject(salt_info); + HANDLE_CLERROR(ret_code, "Error Releasing data_info"); + ret_code = clReleaseMemObject(buffer_in); + HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys"); + ret_code = clReleaseMemObject(buffer_out); + HANDLE_CLERROR(ret_code, "Error Releasing buffer_out"); + + ret_code = clReleaseMemObject(pinned_saved_keys); + HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys"); + + ret_code = clReleaseMemObject(pinned_partial_hashes); + HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes"); +} + +static void set_key(char *key, int index) { + int len = strlen(key); + plaintext[index].length = len; + memcpy(plaintext[index].v, key, len); +} + +static char *get_key(int index) { + static char ret[PLAINTEXT_LENGTH + 1]; + memcpy(ret, plaintext[index].v, PLAINTEXT_LENGTH); + ret[plaintext[index].length] = '\0'; + return ret; +} + +//Try to find the best configuration in order to get the best performance. +/* -- + This function could be used to calculated the best num + of keys per crypt for the given format +-- */ +static void find_best_workgroup(void) { + cl_event myEvent; + cl_ulong startTime, endTime, kernelExecTimeNs = CL_ULONG_MAX; + size_t my_work_group = 1; + cl_int ret_code; + int i; + size_t max_group_size; + + clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof (max_group_size), &max_group_size, NULL); + queue_prof = clCreateCommandQueue(context[gpu_id], devices[gpu_id], + CL_QUEUE_PROFILING_ENABLE, &ret_code); + printf("Max Group Work Size %d ", (int) max_group_size); + local_work_size = 1; + + // Set keys + for (i = 0; i < KEYS_PER_CRYPT; i++) { + set_key("aaabaabaaa", i); + } + clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_TRUE, 0, + sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL); + clEnqueueWriteBuffer(queue_prof, buffer_in, CL_TRUE, 0, + sizeof (crypt_sha512_password) * KEYS_PER_CRYPT, plaintext, 0, NULL, NULL); + + // Find minimum time + for (my_work_group = 1; (int) my_work_group <= (int) max_group_size; my_work_group *= 2) { + ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, + 1, NULL, &max_keys_per_crypt, &my_work_group, 0, NULL, &myEvent); + clFinish(queue_prof); + + if (ret_code != CL_SUCCESS) { + printf("Error %d\n", ret_code); ///Better commented by default. + break; + } + //Get profile information + clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, + sizeof (cl_ulong), &startTime, NULL); + clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, + sizeof (cl_ulong), &endTime, NULL); + + if ((endTime - startTime) < kernelExecTimeNs) { + kernelExecTimeNs = endTime - startTime; + local_work_size = my_work_group; + } + } + printf("Optimal local work size %d\n", (int) local_work_size); + printf("(to avoid this test on next run do export LWS=%d)\n", (int) local_work_size); + clReleaseCommandQueue(queue_prof); +} + +/* -- + This function could be used to calculated the best num + of keys per crypt for the given format +-- */ +static void find_best_kpc(void) { + int num; + cl_event myEvent; + cl_ulong startTime, endTime, tmpTime; + int kernelExecTimeNs = 6969; + cl_int ret_code; + int optimal_kpc = MIN_KEYS_PER_CRYPT; + int i; + cl_uint *tmpbuffer; + + printf("Calculating best keys per crypt, this will take a while "); + + for (num = MAX_KEYS_PER_CRYPT; num > MIN_KEYS_PER_CRYPT; num -= 4096) { + release_clobj(); + create_clobj(num); + advance_cursor(); + queue_prof = clCreateCommandQueue(context[gpu_id], devices[gpu_id], + CL_QUEUE_PROFILING_ENABLE, &ret_code); + + // Set keys + for (i = 0; i < num; i++) { + set_key("aaabaabaaa", i); + } + clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0, + sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL); + clEnqueueWriteBuffer(queue_prof, buffer_in, CL_TRUE, 0, + sizeof (crypt_sha512_password) * num, plaintext, 0, NULL, NULL); + + ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, + 1, NULL, &max_keys_per_crypt, &local_work_size, 0, NULL, &myEvent); + clFinish(queue_prof); + + if (ret_code != CL_SUCCESS) { + printf("Error %d\n", ret_code); + continue; + } + clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, + sizeof (cl_ulong), &startTime, NULL); + clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, + sizeof (cl_ulong), &endTime, NULL); + + tmpTime = endTime - startTime; + tmpbuffer = malloc(sizeof (cl_uint) * num); + + clEnqueueReadBuffer(queue_prof, buffer_out, CL_TRUE, 0, + sizeof (cl_uint) * num, tmpbuffer, 0, NULL, &myEvent); + clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, + sizeof (cl_ulong), &startTime, NULL); + clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, + sizeof (cl_ulong), &endTime, NULL); + tmpTime = tmpTime + (endTime - startTime); + + if (((int) (((float) (tmpTime) / num) * 10)) <= kernelExecTimeNs) { + kernelExecTimeNs = ((int) (((float) (tmpTime) / num) * 10)); + optimal_kpc = num; + } + free(tmpbuffer); + clReleaseCommandQueue(queue_prof); + } + printf("Optimal keys per crypt %d\n", optimal_kpc); + printf("(to avoid this test on next run do \"export KPC=%d\")\n", optimal_kpc); + + max_keys_per_crypt = optimal_kpc; + release_clobj(); + create_clobj(optimal_kpc); +} + +//Startup and clean functions +///TODO: put the auto adjust functions (find_best_kpc) +static void init(struct fmt_main *pFmt) { + char *kpc; + opencl_init("$JOHN/cryptsha512_kernel.cl", gpu_id, platform_id); + + // create kernel to execute + crypt_kernel = clCreateKernel(program[gpu_id], "kernel_crypt", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); + + if (((kpc = getenv("LWS")) == NULL) || (atoi(kpc) == 0)) { + create_clobj(KEYS_PER_CRYPT); + find_best_workgroup(); + release_clobj(); + } else { + local_work_size = atoi(kpc); + } + if ((kpc = getenv("KPC")) == NULL) { + max_keys_per_crypt = KEYS_PER_CRYPT; + create_clobj(KEYS_PER_CRYPT); + } else { + if (atoi(kpc) == 0) { + //user chose to die of boredom + max_keys_per_crypt = KEYS_PER_CRYPT; + create_clobj(KEYS_PER_CRYPT); + find_best_kpc(); + } else { + max_keys_per_crypt = atoi(kpc); + create_clobj(max_keys_per_crypt); + } + } + printf("Local work size (LWS) %d, Keys per crypt (KPC) %Zd\n", + (int) local_work_size, max_keys_per_crypt); + pFmt->params.max_keys_per_crypt = max_keys_per_crypt; +} + +//Check if the ciphertext if a valid SHA-512 crypt +static int valid(char *ciphertext, struct fmt_main *pFmt) { + uint32_t i, j; + int len = strlen(ciphertext); + + if (strncmp(ciphertext, "$6$", 3) != 0) + return 0; + char *p = strrchr(ciphertext, '$'); + if (p == NULL) + return 0; + for (i = p - ciphertext + 1; i < len; i++) { + int found = 0; + for (j = 0; j < 64; j++) + if (itoa64[j] == ARCH_INDEX(ciphertext[i])) + found = 1; + if (found == 0) { + puts("not found"); + return 0; + } + } + if (len - (p - ciphertext + 1) != 86) + return 0; + return 1; +} + +//Deals with salt information +static void *get_salt(char *ciphertext) { + int end = 0, i, len = strlen(ciphertext); + for (i = len - 1; i >= 0; i--) + if (ciphertext[i] == '$') { + end = i; + break; + } + + static unsigned char ret[50]; + for (i = 0; i < end; i++) + ret[i] = ciphertext[i]; + ret[end] = 0; + return (void *) ret; +} + +static void set_salt(void *salt) { + unsigned char *s = salt; + int len = strlen(salt); + static char currentsalt[64]; + memcpy(currentsalt, s, len + 1); + unsigned char offset = 0; + salt_data.rounds = ROUNDS_DEFAULT; + + if (strncmp((char *) "$6$", (char *) currentsalt, 3) == 0) + offset += 3; + + if (strncmp((char *) currentsalt + offset, (char *) "rounds=", 7) == 0) { + const char *num = currentsalt + offset + 7; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + + if (*endp == '$') { + endp += 1; + salt_data.rounds = + MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX)); + } + offset = endp - currentsalt; + } + memcpy(salt_data.salt, currentsalt + offset, 16); + salt_data.saltlen = strlen((char *) salt_data.salt); +} + +//To binary functions +///TODO: check which is the best aproach, this ou cryptsha512.c +static int findb64(char c) { + int ret = ARCH_INDEX(atoi64[(uint8_t) c]); + return ret != 0x7f ? ret : 0; +} + +static void magic(char *crypt, unsigned char *alt) { +#define _24bit_from_b64(I,B2,B1,B0) \ + {\ + unsigned char c1=findb64(crypt[I+0]);\ + unsigned char c2=findb64(crypt[I+1]);\ + unsigned char c3=findb64(crypt[I+2]);\ + unsigned char c4=findb64(crypt[I+3]);\ + unsigned int w=c4<<18|c3<<12|c2<<6|c1;\ + unsigned char b2=w&0xff;w>>=8;\ + unsigned char b1=w&0xff;w>>=8;\ + unsigned char b0=w&0xff;w>>=8;\ + alt[B2]=b0;\ + alt[B1]=b1;\ + alt[B0]=b2;\ + } + _24bit_from_b64(0, 0, 21, 42); + _24bit_from_b64(4, 22, 43, 1); + _24bit_from_b64(8, 44, 2, 23); + _24bit_from_b64(12, 3, 24, 45); + _24bit_from_b64(16, 25, 46, 4); + _24bit_from_b64(20, 47, 5, 26); + _24bit_from_b64(24, 6, 27, 48); + _24bit_from_b64(28, 28, 49, 7); + _24bit_from_b64(32, 50, 8, 29); + _24bit_from_b64(36, 9, 30, 51); + _24bit_from_b64(40, 31, 52, 10); + _24bit_from_b64(44, 53, 11, 32); + _24bit_from_b64(48, 12, 33, 54); + _24bit_from_b64(52, 34, 55, 13); + _24bit_from_b64(56, 56, 14, 35); + _24bit_from_b64(60, 15, 36, 57); + _24bit_from_b64(64, 37, 58, 16); + _24bit_from_b64(68, 59, 17, 38); + _24bit_from_b64(72, 18, 39, 60); + _24bit_from_b64(76, 40, 61, 19); + _24bit_from_b64(80, 62, 20, 41); + + uint32_t w = findb64(crypt[85]) << 6 | findb64(crypt[84]) << 0; + alt[63] = (w & 0xff); +} + +static void * get_binary(char *ciphertext) { + static unsigned char b[BINARY_SIZE]; + memset(b, 0, BINARY_SIZE); + char *p = strrchr(ciphertext, '$'); + + if (p != NULL) + magic(p + 1, b); + return (void *) b; +} + +//Compare functins +static int cmp_all(void *binary, int count) { + uint32_t i; + uint64_t b = ((uint64_t *) binary)[0]; + + for (i = 0; i < count; i++) + if (b == out_hashes[i].v[0]) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) { //TODO: Check Samuele work. + int i; + uint64_t *t = (uint64_t *) binary; + + for (i = 0; i < 8; i++) { + if (t[i] != out_hashes[index].v[i]) + return 0; + } + return 1; +} + +static int cmp_exact(char *source, int count) { //TODO: Check Samuele work. + return 1; +} + +//Crypt function +static void crypt_all(int count) { + //Send data to the dispositive + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0, + sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL), + "failed in clEnqueueWriteBuffer data_info"); + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_in, CL_FALSE, 0, + sizeof(crypt_sha512_password) * max_keys_per_crypt, plaintext, 0, NULL, NULL), + "failed in clEnqueueWriteBuffer buffer_in"); + + //Enqueue the kernel + HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, + &max_keys_per_crypt, &local_work_size, 0, NULL, NULL), + "failed in clEnqueueNDRangeKernel"); + + //Read back hashes + HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_out, CL_FALSE, 0, + sizeof (crypt_sha512_hash) * max_keys_per_crypt, out_hashes, 0, NULL, NULL), + "failed in reading data back"); + + //Do the work + HANDLE_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish"); +} + +//Binary Hash functions group. +static int binary_hash_0(void * binary) { + uint32_t *bin=binary; + printf("bin "); + int i; + for(i=0;i<8;i++) + printf("%08x ",bin[i]); + puts(" (Correct)"); + return ((ARCH_WORD_32 *) binary)[0] & 0xF; } +static int binary_hash_1(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFF; } +static int binary_hash_2(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFF; } +static int binary_hash_3(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFF; } +static int binary_hash_4(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFFF; } +static int binary_hash_5(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFFFF; } +static int binary_hash_6(void * binary) { return *(ARCH_WORD_32 *) binary & 0x7FFFFFF; } + +//Get Hash functions group. +static int get_hash_0(int index) { + printf("\nget "); + int i; + for(i=0;i<8;i++) + printf("%08x ",out_hashes[index].v[i]); + puts(" "); + + return out_hashes[index].v[0] & 0xF; } +static int get_hash_1(int index) { return out_hashes[index].v[0] & 0xFF; } +static int get_hash_2(int index) { return out_hashes[index].v[0] & 0xFFF; } +static int get_hash_3(int index) { return out_hashes[index].v[0] & 0xFFFF; } +static int get_hash_4(int index) { return out_hashes[index].v[0] & 0xFFFFF; } +static int get_hash_5(int index) { return out_hashes[index].v[0] & 0xFFFFFF; } +static int get_hash_6(int index) { return out_hashes[index].v[0] & 0x7FFFFFF; } + +//Format structure +struct fmt_main fmt_opencl_cryptsha512 = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + PLAINTEXT_LENGTH, + BINARY_SIZE, + SALT_SIZE, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT, + tests + }, + { + init, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + binary_hash_0, + binary_hash_1, + binary_hash_2, + binary_hash_3, + binary_hash_4, + binary_hash_5, + binary_hash_6 + }, + fmt_default_salt_hash, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { + get_hash_0, + get_hash_1, + get_hash_2, + get_hash_3, + get_hash_4, + get_hash_5, + get_hash_6 + }, + cmp_all, + cmp_one, + cmp_exact + } +}; -- 1.7.5.4