[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date: Sat, 10 Mar 2012 13:46:24 -0300
From: Claudio André <claudioandre.br@...il.com>
To: john-users@...ts.openwall.com
Subject: Re: Help on SHA-512
Sorry, attached.
I'll ask there.
Thanks
---
PS 1: i'm using an AMD opencl extension. You can delete line: #pragma
OPENCL EXTENSION cl_amd_printf : enable
PS 2: if you have problems in cryptsha512.cl (because of) remove line:
#include "opencl_cryptsha512.h"
PS 3: And copy the included content in cryptsha512.cl file.
PS #: if you like to help but have problems on opencl stuff, please ask.
Em 10-03-2012 13:10, Solar Designer escreveu:
> Hi Claudio,
>
> On Sat, Mar 10, 2012 at 12:56:38PM -0300, Claudio Andr? wrote:
>> I'm working on OpenCL SHA-512 on John [1]. My code has some problem (see
>> output below). Seems i'm failing on one small detail (and the devil
>> lives on the details). Half of it works is fine, half not.
>>
>> If someone knows enough about SHA-512 and kindly can help i really
>> appreciate.
>>
>> The final result is going to be submitted, so your are helping John
>> community/users.
> Sounds great, but you need to include your code and post to john-dev.
> Please do. Thank you!
>
> I admit that there's a small chance that someone on john-users who is
> not also on john-dev will happen to recognize the pattern without even
> having seen the code, but this is a john-dev topic.
>
> Thanks again,
>
> Alexander
diff --git a/src/Makefile b/src/Makefile
index 446a30d..3ac5552 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -119,7 +119,8 @@
OCL_OBJS = \
common-opencl.o opencl_mysqlsha1_fmt.o \
cryptmd5_opencl_fmt.o phpass_opencl_fmt.o opencl_rawsha1_fmt.o \
- opencl_nt_fmt.o opencl_rawmd5_fmt.o opencl_nsldaps_fmt.o
+ opencl_nt_fmt.o opencl_rawmd5_fmt.o opencl_nsldaps_fmt.o \
+ opencl_cryptsha512_fmt.o
CUDA_OBJS = \
cuda_common.o \
@@ -318,6 +319,7 @@
CFLAGS="$(CFLAGS) -I$(OCLROOT)/include -I$(OCLROOT)/include -DHAVE_CRYPT -DCL_VERSION_1_0 -DHAVE_DL" \
LDFLAGS="$(LDFLAGS) -L$(OCLROOT)/lib/x86_64 -L$(OCLROOT)/lib64 -lcrypt -lOpenCL -ldl"
$(CP) opencl/*.cl ../run/
+ $(CP) opencl_cryptsha512.h ../run/
linux-x86-64-cuda:
$(LN) x86-64.h arch.h
diff --git a/src/common-opencl.c b/src/common-opencl.c
index 02f2d6e..96dfec2 100644
--- a/src/common-opencl.c
+++ b/src/common-opencl.c
@@ -83,6 +83,14 @@
HANDLE_CLERROR(ret_code, "Error creating command queue");
}
+static char * include_source(char *pathname)
+{
+ static char include[PATH_BUFFER_SIZE];
+ sprintf(include, "-I %s", path_expand(pathname));
+
+ return include;
+}
+
static void build_kernel(int dev_id)
{
@@ -94,7 +102,8 @@
HANDLE_CLERROR(ret_code, "Error while creating program");
cl_int build_code;
- build_code = clBuildProgram(program[dev_id], 0, NULL, "", NULL, NULL);
+ build_code = clBuildProgram(program[dev_id], 0, NULL,
+ include_source("$JOHN/"), NULL, NULL);
HANDLE_CLERROR(clGetProgramBuildInfo(program[dev_id], devices[dev_id],
CL_PROGRAM_BUILD_LOG, sizeof(opencl_log), (void *) opencl_log,
diff --git a/src/john.c b/src/john.c
index b40ec1a..6840128 100644
--- a/src/john.c
+++ b/src/john.c
@@ -110,6 +110,7 @@
extern struct fmt_main fmt_opencl_cryptMD5;
extern struct fmt_main fmt_opencl_phpass;
extern struct fmt_main fmt_opencl_mysqlsha1;
+extern struct fmt_main fmt_opencl_cryptsha512;
#endif
#ifdef HAVE_CUDA
extern struct fmt_main fmt_cuda_cryptmd5;
@@ -223,6 +224,7 @@
john_register_one(&fmt_opencl_cryptMD5);
john_register_one(&fmt_opencl_phpass);
john_register_one(&fmt_opencl_mysqlsha1);
+ john_register_one(&fmt_opencl_cryptsha512);
#endif
#ifdef HAVE_CUDA
diff --git a/src/opencl/cryptsha512.cl b/src/opencl/cryptsha512.cl
new file mode 100644
index 0000000..521da80
--- /dev/null
+++ b/src/opencl/cryptsha512.cl
@@ -0,0 +1,346 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+
+#include "opencl_cryptsha512.h"
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+
+__constant uint64_t k[] = {
+ 0x428a2f98d728ae22LL, 0x7137449123ef65cdLL, 0xb5c0fbcfec4d3b2fLL,
+ 0xe9b5dba58189dbbcLL,
+ 0x3956c25bf348b538LL, 0x59f111f1b605d019LL, 0x923f82a4af194f9bLL,
+ 0xab1c5ed5da6d8118LL,
+ 0xd807aa98a3030242LL, 0x12835b0145706fbeLL, 0x243185be4ee4b28cLL,
+ 0x550c7dc3d5ffb4e2LL,
+ 0x72be5d74f27b896fLL, 0x80deb1fe3b1696b1LL, 0x9bdc06a725c71235LL,
+ 0xc19bf174cf692694LL,
+ 0xe49b69c19ef14ad2LL, 0xefbe4786384f25e3LL, 0x0fc19dc68b8cd5b5LL,
+ 0x240ca1cc77ac9c65LL,
+ 0x2de92c6f592b0275LL, 0x4a7484aa6ea6e483LL, 0x5cb0a9dcbd41fbd4LL,
+ 0x76f988da831153b5LL,
+ 0x983e5152ee66dfabLL, 0xa831c66d2db43210LL, 0xb00327c898fb213fLL,
+ 0xbf597fc7beef0ee4LL,
+ 0xc6e00bf33da88fc2LL, 0xd5a79147930aa725LL, 0x06ca6351e003826fLL,
+ 0x142929670a0e6e70LL,
+ 0x27b70a8546d22ffcLL, 0x2e1b21385c26c926LL, 0x4d2c6dfc5ac42aedLL,
+ 0x53380d139d95b3dfLL,
+ 0x650a73548baf63deLL, 0x766a0abb3c77b2a8LL, 0x81c2c92e47edaee6LL,
+ 0x92722c851482353bLL,
+ 0xa2bfe8a14cf10364LL, 0xa81a664bbc423001LL, 0xc24b8b70d0f89791LL,
+ 0xc76c51a30654be30LL,
+ 0xd192e819d6ef5218LL, 0xd69906245565a910LL, 0xf40e35855771202aLL,
+ 0x106aa07032bbd1b8LL,
+ 0x19a4c116b8d2d0c8LL, 0x1e376c085141ab53LL, 0x2748774cdf8eeb99LL,
+ 0x34b0bcb5e19b48a8LL,
+ 0x391c0cb3c5c95a63LL, 0x4ed8aa4ae3418acbLL, 0x5b9cca4f7763e373LL,
+ 0x682e6ff3d6b2b8a3LL,
+ 0x748f82ee5defb2fcLL, 0x78a5636f43172f60LL, 0x84c87814a1f0ab72LL,
+ 0x8cc702081a6439ecLL,
+ 0x90befffa23631e28LL, 0xa4506cebde82bde9LL, 0xbef9a3f7b2c67915LL,
+ 0xc67178f2e372532bLL,
+ 0xca273eceea26619cLL, 0xd186b8c721c0c207LL, 0xeada7dd6cde0eb1eLL,
+ 0xf57d4f7fee6ed178LL,
+ 0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, 0x113f9804bef90daeLL,
+ 0x1b710b35131c471bLL,
+ 0x28db77f523047d84LL, 0x32caab7b40c72493LL, 0x3c9ebe0a15c9bebcLL,
+ 0x431d67c49c100d4cLL,
+ 0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL,
+ 0x6c44198c4a475817LL,
+};
+
+void init_ctx(sha512_ctx * ctx) {
+ ctx->H[0] = 0x6a09e667f3bcc908LL;
+ ctx->H[1] = 0xbb67ae8584caa73bLL;
+ ctx->H[2] = 0x3c6ef372fe94f82bLL;
+ ctx->H[3] = 0xa54ff53a5f1d36f1LL;
+ ctx->H[4] = 0x510e527fade682d1LL;
+ ctx->H[5] = 0x9b05688c2b3e6c1fLL;
+ ctx->H[6] = 0x1f83d9abfb41bd6bLL;
+ ctx->H[7] = 0x5be0cd19137e2179LL;
+ ctx->total = 0;
+ ctx->buflen = 0;
+}
+
+void memcpy_1(uint8_t * dest, const uint8_t * src, const size_t n) {
+ for (int i = 0; i < n; i++)
+ dest[i] = src[i];
+}
+
+void memcpy (uint8_t * dest, buffer_64 * src, const size_t n) {
+ for (int i = 0; i < n; i++)
+ dest[i] = src->mem_08[i];
+}
+
+void depura(buffer_64 * ctx)
+{return;
+ printf("\nCtx: ");
+ for(int i=0;i<8;i++)
+ printf("%08x ",ctx[i]);
+}
+
+void debug_ctx(sha512_ctx * ctx)
+{
+ printf("\nCtx: ");
+ int i;
+ for(i=0;i<8;i++)
+ printf("%08x ",ctx->H[i]);
+ printf(" <%03d>",ctx->total);
+ printf(" <%03d>",ctx->buflen);
+}
+
+void insert_to_buffer(sha512_ctx * ctx, const uint8_t * string,
+ const uint8_t len) {
+ uint8_t *d = ctx->buffer->mem_08 + ctx->buflen; //Position ctx->buflen (in char size)
+ memcpy_1(d, string, len);
+ ctx->buflen += len;
+}
+
+void sha512_block(sha512_ctx * ctx) {
+ int i;
+ uint64_t a = ctx->H[0];
+ uint64_t b = ctx->H[1];
+ uint64_t c = ctx->H[2];
+ uint64_t d = ctx->H[3];
+ uint64_t e = ctx->H[4];
+ uint64_t f = ctx->H[5];
+ uint64_t g = ctx->H[6];
+ uint64_t h = ctx->H[7];
+
+ uint64_t w[16];
+
+ uint64_t *data = ctx->buffer->mem_64; //The same as buffer[0]
+ //#pragma unroll 16
+ for (i = 0; i < 16; i++)
+ w[i] = SWAP64(data[i]);
+
+ uint64_t t1, t2;
+ //#pragma unroll 16
+ for (i = 0; i < 16; i++) {
+
+
+ t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
+ t2 = Maj(a, b, c) + Sigma0(a);
+
+ h = g;
+ g = f;
+ f = e;
+ e = d + t1;
+ d = c;
+ c = b;
+ b = a;
+ a = t1 + t2;
+
+ }
+
+
+ for (i = 16; i < 80; i++) {
+
+
+ w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
+ t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
+ t2 = Maj(a, b, c) + Sigma0(a);
+
+ h = g;
+ g = f;
+ f = e;
+ e = d + t1;
+ d = c;
+ c = b;
+ b = a;
+ a = t1 + t2;
+
+ }
+
+ ctx->H[0] += a;
+ ctx->H[1] += b;
+ ctx->H[2] += c;
+ ctx->H[3] += d;
+ ctx->H[4] += e;
+ ctx->H[5] += f;
+ ctx->H[6] += g;
+ ctx->H[7] += h;
+}
+
+void ctx_append_1(sha512_ctx * ctx) {
+ uint32_t length = ctx->buflen;
+ int i = 127 - length;
+ uint8_t *d = ctx->buffer->mem_08 + length;
+ *d++ = 0x80;
+
+ while (i--) {
+ d[i] = 0;
+ }
+}
+
+void ctx_add_length(sha512_ctx * ctx) {
+ uint64_t *blocks = ctx->buffer->mem_64;
+ blocks[15] = SWAP64((uint64_t) (ctx->total * 8));
+}
+
+void finish_ctx(sha512_ctx * ctx) {
+ ctx_append_1(ctx);
+ ctx_add_length(ctx);
+ ctx->buflen = 0;
+}
+
+void ctx_update(sha512_ctx * ctx, uint8_t *string, uint8_t len) {
+ ctx->total += len;
+ uint8_t startpos = ctx->buflen;
+ uint8_t partsize;
+ if (startpos + len <= 128) {
+ partsize = len;
+ } else
+ partsize = 128 - startpos;
+
+ insert_to_buffer(ctx, string, partsize);
+ if (ctx->buflen == 128) {
+ uint8_t offset = 128 - startpos;
+ sha512_block(ctx);
+ ctx->buflen = 0;
+ insert_to_buffer(ctx, (string + offset), len - offset);
+ }
+}
+
+void clear_ctx_buffer(sha512_ctx * ctx) {
+
+ uint32_t *w = ctx->buffer->mem_32;
+ //#pragma unroll 30
+ for (int i = 0; i < 30; i++) //TODO: why 30? Not 32?
+ w[i] = 0;
+
+ ctx->buflen = 0;
+}
+
+void sha512_digest(sha512_ctx * ctx, uint64_t * result) {
+ uint8_t i;
+ if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block
+ finish_ctx(ctx);
+ sha512_block(ctx);
+ } else {
+ uint8_t moved = 1;
+ if (ctx->buflen < 128) { //data and 0x80 fits in one block
+ ctx_append_1(ctx);
+ moved = 0;
+ }
+ sha512_block(ctx);
+ clear_ctx_buffer(ctx);
+ if (moved)
+ ctx->buffer->mem_08[0] = 0x80; //append 1,the rest is already clean
+ ctx_add_length(ctx);
+ sha512_block(ctx);
+ }
+ //#pragma unroll 8
+ for (i = 0; i < 8; i++)
+ result[i] = SWAP64(ctx->H[i]);
+}
+
+void sha512crypt(uint8_t *pass, uint8_t passlength,
+ crypt_sha512_salt cuda_salt,
+ __global crypt_sha512_hash * output) {
+
+ buffer_64 alt_result[8], temp_result[8];
+ int i;
+ sha512_ctx ctx;
+ init_ctx(&ctx);
+
+ ctx_update(&ctx, pass, passlength);
+ ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen);
+ ctx_update(&ctx, pass, passlength);
+
+ sha512_digest(&ctx, alt_result->mem_64);
+ init_ctx(&ctx);
+depura(alt_result); ;//TODO: #################33
+ ctx_update(&ctx, pass, passlength);
+ ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen);
+ ctx_update(&ctx, alt_result->mem_08, passlength);
+
+depura(alt_result); ;//TODO: #################33
+
+ for (i = passlength; i > 0; i >>= 1) {
+ if ((i & 1) != 0)
+ ctx_update(&ctx, alt_result->mem_08, 64);
+ else
+ ctx_update(&ctx, pass, passlength);
+ }
+ sha512_digest(&ctx, alt_result->mem_64);
+
+
+ init_ctx(&ctx);
+ for (i = 0; i < passlength; i++)
+ ctx_update(&ctx, pass, passlength);
+
+ sha512_digest(&ctx, temp_result->mem_64);
+
+ uint8_t sp_sequence[16 + 4];
+ uint8_t *p_sequence = sp_sequence;
+ memcpy(p_sequence, temp_result, passlength);
+
+ init_ctx(&ctx);
+
+ /* For every character in the password add the entire password. */
+ for (i = 0; i < 16 + (alt_result->mem_08)[0]; i++) //Analyse, TÁ CERTO?###
+ ctx_update(&ctx, cuda_salt.salt, cuda_salt.saltlen);
+
+ /* Finish the digest. */
+ sha512_digest(&ctx, temp_result->mem_64);
+
+ uint8_t saltlength = cuda_salt.saltlen;
+
+ uint8_t ss_sequence[16 + 4];
+ uint8_t *s_sequence = ss_sequence;
+ memcpy(s_sequence, temp_result, saltlength);
+
+ /* Repeatedly run the collected hash value through SHA512 to
+ burn CPU cycles. */
+ for (i = 0; i < cuda_salt.rounds; i++) {
+ init_ctx(&ctx);
+
+ if ((i & 1) != 0)
+ ctx_update(&ctx, p_sequence, passlength);
+ else
+ ctx_update(&ctx, alt_result->mem_08, 64);
+
+ if ((i % 3) != 0)
+ ctx_update(&ctx, s_sequence, saltlength);
+
+ if ((i % 7) != 0)
+ ctx_update(&ctx, p_sequence, passlength);
+
+ if ((i & 1) != 0)
+ ctx_update(&ctx, alt_result->mem_08, 64);
+ else
+ ctx_update(&ctx, p_sequence, passlength);
+
+ sha512_digest(&ctx, alt_result->mem_64);
+ }
+ //#pragma unroll 8
+ for (i = 0; i < 8; i++)
+ output->v[i] = alt_result[i].mem_64[0];
+}
+
+__kernel void kernel_crypt(__constant crypt_sha512_salt * hsalt,
+ __constant crypt_sha512_password * inbuffer,
+ __global crypt_sha512_hash * outbuffer) {
+
+ uint8_t pass[PLAINTEXT_LENGTH];
+ crypt_sha512_salt salt_data;
+
+ //Get the task to be done
+ uint32_t idx = get_global_id(0);
+
+ //Get password information, put in faster memory.
+ for (int i = 0; i < inbuffer[idx].length; i++)
+ pass[i] = inbuffer[idx].v[i];
+
+ //Get salt information, put in faster memory.
+ salt_data.saltlen = hsalt->saltlen;
+ salt_data.rounds = hsalt->rounds;
+
+ for (int i = 0; i < salt_data.saltlen; i++)
+ salt_data.salt[i] = hsalt->salt[i];
+
+ //Do the job
+ sha512crypt(pass, inbuffer[idx].length, salt_data, &outbuffer[idx]);
+}
\ No newline at end of file
diff --git a/src/opencl_cryptsha512_fmt.c b/src/opencl_cryptsha512_fmt.c
new file mode 100644
index 0000000..ea54b15
--- /dev/null
+++ b/src/opencl_cryptsha512_fmt.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2011 Samuele Giovanni Tonon
+ * samu at linuxasylum dot net
+ * This program comes with ABSOLUTELY NO WARRANTY; express or
+ * implied .
+ * This is free software, and you are welcome to redistribute it
+ * under certain conditions; as expressed here
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ */
+
+#include <string.h>
+#include "common-opencl.h"
+#include "opencl_cryptsha512.h"
+
+
+#define FORMAT_LABEL "cryptsha512-opencl"
+#define FORMAT_NAME "crypt SHA-512 OpenCL"
+
+#if ARCH_BITS >= 64
+#define ALGORITHM_NAME "OpenSSL 64/" ARCH_BITS_STR
+#else
+#define ALGORITHM_NAME "OpenSSL 32/" ARCH_BITS_STR
+#endif
+
+#define BENCHMARK_COMMENT " rounds=5000"
+#define BENCHMARK_LENGTH -1
+
+#define BINARY_SIZE (3+16+86) ///TODO: Magic number?
+
+#define MIN_KEYS_PER_CRYPT 1024
+#define MAX_KEYS_PER_CRYPT KEYS_PER_CRYPT
+
+static crypt_sha512_password *plaintext; // plaintext ciphertexts
+static crypt_sha512_hash *out_hashes; // calculated hashes
+static crypt_sha512_salt salt_data;
+
+cl_mem salt_info; //Salt information.
+cl_mem buffer_in; //Plaintext buffer.
+cl_mem buffer_out; //Hash keys (output)
+cl_mem pinned_saved_keys, pinned_partial_hashes;
+
+cl_command_queue queue_prof;
+cl_kernel crypt_kernel;
+
+static size_t max_keys_per_crypt = KEYS_PER_CRYPT;
+
+static struct fmt_tests tests[] = {
+ //{"$6$saltstring$svn8UoSVapNtMuq1ukKS4tPQd8iKwSMHWjl/O817G3uBnIFNjnQJuesI68u4OTLiBFdcbYEdFCoEOfaS35inz1", "Hello world!"},
+ {"$6$LKO/Ute40T3FNF95$6S/6T2YuOIHY0N3XpLKABJ3soYcXD9mB7uVbtEZDj/LNscVhZoZ9DEH.sBciDrMsHOWOoASbNLTypH/5X26gN0", "U*U*U*U*"},
+ //{"$6$LKO/Ute40T3FNF95$wK80cNqkiAUzFuVGxW6eFe8J.fSVI65MD5yEm8EjYMaJuDrhwe5XXpHDJpwF/kY.afsUs1LlgQAaOapVNbggZ1", "U*U***U"},
+ //{"$6$OmBOuxFYBZCYAadG$WCckkSZok9xhp4U1shIZEV7CCVwQUwMVea7L3A77th6SaE9jOPupEMJB.z0vIWCDiN9WLh2m9Oszrj5G.gt330", "*U*U*U*U"},
+ //{"$6$ojWH1AiTee9x1peC$QVEnTvRVlPRhcLQCk/HnHaZmlGAAjCfrAN0FtOsOnUk5K5Bn/9eLHHiRzrTzaIKjW9NTLNIBUCtNVOowWS2mN.", ""},
+ {NULL}
+};
+
+//Initialization and finalization functions
+static void create_clobj(int kpc) {
+ pinned_saved_keys = clCreateBuffer(context[gpu_id],
+ CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+ sizeof(crypt_sha512_password) * kpc, NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_keys");
+
+ plaintext = (crypt_sha512_password *) clEnqueueMapBuffer(queue[gpu_id],
+ pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0,
+ sizeof(crypt_sha512_password) * kpc, 0, NULL, NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_plain");
+
+ pinned_partial_hashes = clCreateBuffer(context[gpu_id],
+ CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+ sizeof(crypt_sha512_hash) * kpc, NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_partial_hashes");
+
+ out_hashes = (crypt_sha512_hash *) clEnqueueMapBuffer(queue[gpu_id],
+ pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0,
+ sizeof(crypt_sha512_hash) * kpc, 0, NULL, NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error mapping page-locked memory out_hashes");
+
+ // create arguments (buffers)
+ salt_info = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
+ sizeof(crypt_sha512_salt), NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating data_info out argument");
+
+ buffer_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
+ sizeof(crypt_sha512_password) * kpc, NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys");
+
+ buffer_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY,
+ sizeof(crypt_sha512_hash) * kpc, NULL, &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_out");
+
+ //Set kernel arguments
+ HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof (cl_mem),
+ (void *) &salt_info), "Error setting argument 0");
+ HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof (cl_mem),
+ (void *) &buffer_in), "Error setting argument 1");
+ HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof (cl_mem),
+ (void *) &buffer_out), "Error setting argument 2");
+
+ memset(plaintext, '\0', sizeof(crypt_sha512_password) * kpc);
+ salt_data.saltlen = 0;
+ salt_data.rounds = 0;
+ max_keys_per_crypt = kpc;
+}
+
+static void release_clobj(void) {
+ cl_int ret_code;
+
+ ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_partial_hashes,
+ out_hashes, 0, NULL, NULL);
+ HANDLE_CLERROR(ret_code, "Error Ummapping out_hashes");
+
+ ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_keys,
+ plaintext, 0, NULL, NULL);
+ HANDLE_CLERROR(ret_code, "Error Ummapping saved_plain");
+
+ ret_code = clReleaseMemObject(salt_info);
+ HANDLE_CLERROR(ret_code, "Error Releasing data_info");
+ ret_code = clReleaseMemObject(buffer_in);
+ HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys");
+ ret_code = clReleaseMemObject(buffer_out);
+ HANDLE_CLERROR(ret_code, "Error Releasing buffer_out");
+
+ ret_code = clReleaseMemObject(pinned_saved_keys);
+ HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys");
+
+ ret_code = clReleaseMemObject(pinned_partial_hashes);
+ HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes");
+}
+
+static void set_key(char *key, int index) {
+ int len = strlen(key);
+ plaintext[index].length = len;
+ memcpy(plaintext[index].v, key, len);
+}
+
+static char *get_key(int index) {
+ static char ret[PLAINTEXT_LENGTH + 1];
+ memcpy(ret, plaintext[index].v, PLAINTEXT_LENGTH);
+ ret[plaintext[index].length] = '\0';
+ return ret;
+}
+
+//Try to find the best configuration in order to get the best performance.
+/* --
+ This function could be used to calculated the best num
+ of keys per crypt for the given format
+-- */
+static void find_best_workgroup(void) {
+ cl_event myEvent;
+ cl_ulong startTime, endTime, kernelExecTimeNs = CL_ULONG_MAX;
+ size_t my_work_group = 1;
+ cl_int ret_code;
+ int i;
+ size_t max_group_size;
+
+ clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof (max_group_size), &max_group_size, NULL);
+ queue_prof = clCreateCommandQueue(context[gpu_id], devices[gpu_id],
+ CL_QUEUE_PROFILING_ENABLE, &ret_code);
+ printf("Max Group Work Size %d ", (int) max_group_size);
+ local_work_size = 1;
+
+ // Set keys
+ for (i = 0; i < KEYS_PER_CRYPT; i++) {
+ set_key("aaabaabaaa", i);
+ }
+ clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_TRUE, 0,
+ sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue_prof, buffer_in, CL_TRUE, 0,
+ sizeof (crypt_sha512_password) * KEYS_PER_CRYPT, plaintext, 0, NULL, NULL);
+
+ // Find minimum time
+ for (my_work_group = 1; (int) my_work_group <= (int) max_group_size; my_work_group *= 2) {
+ ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel,
+ 1, NULL, &max_keys_per_crypt, &my_work_group, 0, NULL, &myEvent);
+ clFinish(queue_prof);
+
+ if (ret_code != CL_SUCCESS) {
+ printf("Error %d\n", ret_code); ///Better commented by default.
+ break;
+ }
+ //Get profile information
+ clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
+ sizeof (cl_ulong), &startTime, NULL);
+ clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
+ sizeof (cl_ulong), &endTime, NULL);
+
+ if ((endTime - startTime) < kernelExecTimeNs) {
+ kernelExecTimeNs = endTime - startTime;
+ local_work_size = my_work_group;
+ }
+ }
+ printf("Optimal local work size %d\n", (int) local_work_size);
+ printf("(to avoid this test on next run do export LWS=%d)\n", (int) local_work_size);
+ clReleaseCommandQueue(queue_prof);
+}
+
+/* --
+ This function could be used to calculated the best num
+ of keys per crypt for the given format
+-- */
+static void find_best_kpc(void) {
+ int num;
+ cl_event myEvent;
+ cl_ulong startTime, endTime, tmpTime;
+ int kernelExecTimeNs = 6969;
+ cl_int ret_code;
+ int optimal_kpc = MIN_KEYS_PER_CRYPT;
+ int i;
+ cl_uint *tmpbuffer;
+
+ printf("Calculating best keys per crypt, this will take a while ");
+
+ for (num = MAX_KEYS_PER_CRYPT; num > MIN_KEYS_PER_CRYPT; num -= 4096) {
+ release_clobj();
+ create_clobj(num);
+ advance_cursor();
+ queue_prof = clCreateCommandQueue(context[gpu_id], devices[gpu_id],
+ CL_QUEUE_PROFILING_ENABLE, &ret_code);
+
+ // Set keys
+ for (i = 0; i < num; i++) {
+ set_key("aaabaabaaa", i);
+ }
+ clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0,
+ sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue_prof, buffer_in, CL_TRUE, 0,
+ sizeof (crypt_sha512_password) * num, plaintext, 0, NULL, NULL);
+
+ ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel,
+ 1, NULL, &max_keys_per_crypt, &local_work_size, 0, NULL, &myEvent);
+ clFinish(queue_prof);
+
+ if (ret_code != CL_SUCCESS) {
+ printf("Error %d\n", ret_code);
+ continue;
+ }
+ clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
+ sizeof (cl_ulong), &startTime, NULL);
+ clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
+ sizeof (cl_ulong), &endTime, NULL);
+
+ tmpTime = endTime - startTime;
+ tmpbuffer = malloc(sizeof (cl_uint) * num);
+
+ clEnqueueReadBuffer(queue_prof, buffer_out, CL_TRUE, 0,
+ sizeof (cl_uint) * num, tmpbuffer, 0, NULL, &myEvent);
+ clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
+ sizeof (cl_ulong), &startTime, NULL);
+ clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
+ sizeof (cl_ulong), &endTime, NULL);
+ tmpTime = tmpTime + (endTime - startTime);
+
+ if (((int) (((float) (tmpTime) / num) * 10)) <= kernelExecTimeNs) {
+ kernelExecTimeNs = ((int) (((float) (tmpTime) / num) * 10));
+ optimal_kpc = num;
+ }
+ free(tmpbuffer);
+ clReleaseCommandQueue(queue_prof);
+ }
+ printf("Optimal keys per crypt %d\n", optimal_kpc);
+ printf("(to avoid this test on next run do \"export KPC=%d\")\n", optimal_kpc);
+
+ max_keys_per_crypt = optimal_kpc;
+ release_clobj();
+ create_clobj(optimal_kpc);
+}
+
+//Startup and clean functions
+///TODO: put the auto adjust functions (find_best_kpc)
+static void init(struct fmt_main *pFmt) {
+ char *kpc;
+ opencl_init("$JOHN/cryptsha512.cl", gpu_id, platform_id);
+
+ // create kernel to execute
+ crypt_kernel = clCreateKernel(program[gpu_id], "kernel_crypt", &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?");
+
+ if (((kpc = getenv("LWS")) == NULL) || (atoi(kpc) == 0)) {
+ create_clobj(KEYS_PER_CRYPT);
+ find_best_workgroup();
+ release_clobj();
+ } else {
+ local_work_size = atoi(kpc);
+ }
+ if ((kpc = getenv("KPC")) == NULL) {
+ max_keys_per_crypt = KEYS_PER_CRYPT;
+ create_clobj(KEYS_PER_CRYPT);
+ } else {
+ if (atoi(kpc) == 0) {
+ //user chose to die of boredom
+ max_keys_per_crypt = KEYS_PER_CRYPT;
+ create_clobj(KEYS_PER_CRYPT);
+ find_best_kpc();
+ } else {
+ max_keys_per_crypt = atoi(kpc);
+ create_clobj(max_keys_per_crypt);
+ }
+ }
+ printf("Local work size (LWS) %d, Keys per crypt (KPC) %Zd\n",
+ (int) local_work_size, max_keys_per_crypt);
+ pFmt->params.max_keys_per_crypt = max_keys_per_crypt;
+}
+
+//Check if the ciphertext if a valid SHA-512 crypt
+static int valid(char *ciphertext, struct fmt_main *pFmt) {
+ uint32_t i, j;
+ int len = strlen(ciphertext);
+
+ if (strncmp(ciphertext, "$6$", 3) != 0)
+ return 0;
+ char *p = strrchr(ciphertext, '$');
+ if (p == NULL)
+ return 0;
+ for (i = p - ciphertext + 1; i < len; i++) {
+ int found = 0;
+ for (j = 0; j < 64; j++)
+ if (itoa64[j] == ARCH_INDEX(ciphertext[i]))
+ found = 1;
+ if (found == 0) {
+ puts("not found");
+ return 0;
+ }
+ }
+ if (len - (p - ciphertext + 1) != 86)
+ return 0;
+ return 1;
+}
+
+//Deals with salt information
+static void *get_salt(char *ciphertext) {
+ int end = 0, i, len = strlen(ciphertext);
+ for (i = len - 1; i >= 0; i--)
+ if (ciphertext[i] == '$') {
+ end = i;
+ break;
+ }
+
+ static unsigned char ret[50];
+ for (i = 0; i < end; i++)
+ ret[i] = ciphertext[i];
+ ret[end] = 0;
+ return (void *) ret;
+}
+
+static void set_salt(void *salt) {
+ unsigned char *s = salt;
+ int len = strlen(salt);
+ static char currentsalt[64];
+ memcpy(currentsalt, s, len + 1);
+ unsigned char offset = 0;
+ salt_data.rounds = ROUNDS_DEFAULT;
+
+ if (strncmp((char *) "$6$", (char *) currentsalt, 3) == 0)
+ offset += 3;
+
+ if (strncmp((char *) currentsalt + offset, (char *) "rounds=", 7) == 0) {
+ const char *num = currentsalt + offset + 7;
+ char *endp;
+ unsigned long int srounds = strtoul(num, &endp, 10);
+
+ if (*endp == '$') {
+ endp += 1;
+ salt_data.rounds =
+ MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+ }
+ offset = endp - currentsalt;
+ }
+ memcpy(salt_data.salt, currentsalt + offset, 16);
+ salt_data.saltlen = strlen((char *) salt_data.salt);
+}
+
+//To binary functions
+///TODO: check which is the best aproach, this ou cryptsha512.c
+static int findb64(char c) {
+ int ret = ARCH_INDEX(atoi64[(uint8_t) c]);
+ return ret != 0x7f ? ret : 0;
+}
+
+static void magic(char *crypt, unsigned char *alt) {
+#define _24bit_from_b64(I,B2,B1,B0) \
+ {\
+ unsigned char c1=findb64(crypt[I+0]);\
+ unsigned char c2=findb64(crypt[I+1]);\
+ unsigned char c3=findb64(crypt[I+2]);\
+ unsigned char c4=findb64(crypt[I+3]);\
+ unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+ unsigned char b2=w&0xff;w>>=8;\
+ unsigned char b1=w&0xff;w>>=8;\
+ unsigned char b0=w&0xff;w>>=8;\
+ alt[B2]=b0;\
+ alt[B1]=b1;\
+ alt[B0]=b2;\
+ }
+ _24bit_from_b64(0, 0, 21, 42);
+ _24bit_from_b64(4, 22, 43, 1);
+ _24bit_from_b64(8, 44, 2, 23);
+ _24bit_from_b64(12, 3, 24, 45);
+ _24bit_from_b64(16, 25, 46, 4);
+ _24bit_from_b64(20, 47, 5, 26);
+ _24bit_from_b64(24, 6, 27, 48);
+ _24bit_from_b64(28, 28, 49, 7);
+ _24bit_from_b64(32, 50, 8, 29);
+ _24bit_from_b64(36, 9, 30, 51);
+ _24bit_from_b64(40, 31, 52, 10);
+ _24bit_from_b64(44, 53, 11, 32);
+ _24bit_from_b64(48, 12, 33, 54);
+ _24bit_from_b64(52, 34, 55, 13);
+ _24bit_from_b64(56, 56, 14, 35);
+ _24bit_from_b64(60, 15, 36, 57);
+ _24bit_from_b64(64, 37, 58, 16);
+ _24bit_from_b64(68, 59, 17, 38);
+ _24bit_from_b64(72, 18, 39, 60);
+ _24bit_from_b64(76, 40, 61, 19);
+ _24bit_from_b64(80, 62, 20, 41);
+
+ uint32_t w = findb64(crypt[85]) << 6 | findb64(crypt[84]) << 0;
+ alt[63] = (w & 0xff);
+}
+
+static void * get_binary(char *ciphertext) {
+ static unsigned char b[BINARY_SIZE];
+ memset(b, 0, BINARY_SIZE);
+ char *p = strrchr(ciphertext, '$');
+
+ if (p != NULL)
+ magic(p + 1, b);
+ return (void *) b;
+}
+
+//Compare functins
+static int cmp_all(void *binary, int count) {
+ uint32_t i;
+ uint64_t b = ((uint64_t *) binary)[0];
+
+ for (i = 0; i < count; i++)
+ if (b == out_hashes[i].v[0])
+ return 1;
+ return 0;
+}
+
+static int cmp_one(void *binary, int index) { //TODO: Check Samuele work.
+ int i;
+ uint64_t *t = (uint64_t *) binary;
+
+ for (i = 0; i < 8; i++) {
+ if (t[i] != out_hashes[index].v[i])
+ return 0;
+ }
+ return 1;
+}
+
+static int cmp_exact(char *source, int count) { //TODO: Check Samuele work.
+ return 1;
+}
+
+//Crypt function
+static void crypt_all(int count) {
+ //Send data to the dispositive
+ HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], salt_info, CL_FALSE, 0,
+ sizeof (crypt_sha512_salt), &salt_data, 0, NULL, NULL),
+ "failed in clEnqueueWriteBuffer data_info");
+ HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_in, CL_FALSE, 0,
+ sizeof(crypt_sha512_password) * max_keys_per_crypt, plaintext, 0, NULL, NULL),
+ "failed in clEnqueueWriteBuffer buffer_in");
+
+ //Enqueue the kernel
+ HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL,
+ &max_keys_per_crypt, &local_work_size, 0, NULL, NULL),
+ "failed in clEnqueueNDRangeKernel");
+
+ //Read back hashes
+ HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_out, CL_FALSE, 0,
+ sizeof (crypt_sha512_hash) * max_keys_per_crypt, out_hashes, 0, NULL, NULL),
+ "failed in reading data back");
+
+ //Do the work
+ HANDLE_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
+}
+
+//Binary Hash functions group.
+static int binary_hash_0(void * binary) {
+ uint32_t *bin=binary;
+ printf("bin ");
+ int i;
+ for(i=0;i<8;i++)
+ printf("%08x ",bin[i]);
+ puts(" (Correct)");
+ return ((ARCH_WORD_32 *) binary)[0] & 0xF; }
+static int binary_hash_1(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFF; }
+static int binary_hash_2(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFF; }
+static int binary_hash_3(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFF; }
+static int binary_hash_4(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFFF; }
+static int binary_hash_5(void * binary) { return *(ARCH_WORD_32 *) binary & 0xFFFFFF; }
+static int binary_hash_6(void * binary) { return *(ARCH_WORD_32 *) binary & 0x7FFFFFF; }
+
+//Get Hash functions group.
+static int get_hash_0(int index) {
+ printf("\nget ");
+ int i;
+ for(i=0;i<8;i++)
+ printf("%08x ",out_hashes[index].v[i]);
+ puts(" ");
+
+ return out_hashes[index].v[0] & 0xF; }
+static int get_hash_1(int index) { return out_hashes[index].v[0] & 0xFF; }
+static int get_hash_2(int index) { return out_hashes[index].v[0] & 0xFFF; }
+static int get_hash_3(int index) { return out_hashes[index].v[0] & 0xFFFF; }
+static int get_hash_4(int index) { return out_hashes[index].v[0] & 0xFFFFF; }
+static int get_hash_5(int index) { return out_hashes[index].v[0] & 0xFFFFFF; }
+static int get_hash_6(int index) { return out_hashes[index].v[0] & 0x7FFFFFF; }
+
+//Format structure
+struct fmt_main fmt_opencl_cryptsha512 = {
+ {
+ FORMAT_LABEL,
+ FORMAT_NAME,
+ ALGORITHM_NAME,
+ BENCHMARK_COMMENT,
+ BENCHMARK_LENGTH,
+ PLAINTEXT_LENGTH,
+ BINARY_SIZE,
+ SALT_SIZE,
+ MIN_KEYS_PER_CRYPT,
+ MAX_KEYS_PER_CRYPT,
+ FMT_CASE | FMT_8_BIT,
+ tests
+ },
+ {
+ init,
+ fmt_default_prepare,
+ valid,
+ fmt_default_split,
+ get_binary,
+ get_salt,
+ {
+ binary_hash_0,
+ binary_hash_1,
+ binary_hash_2,
+ binary_hash_3,
+ binary_hash_4,
+ binary_hash_5,
+ binary_hash_6
+ },
+ fmt_default_salt_hash,
+ set_salt,
+ set_key,
+ get_key,
+ fmt_default_clear_keys,
+ crypt_all,
+ {
+ get_hash_0,
+ get_hash_1,
+ get_hash_2,
+ get_hash_3,
+ get_hash_4,
+ get_hash_5,
+ get_hash_6
+ },
+ cmp_all,
+ cmp_one,
+ cmp_exact
+ }
+};
Powered by blists - more mailing lists
Powered by Openwall GNU/*/Linux -
Powered by OpenVZ