commit 626ed7acd96054cd5141563b80cc3a9be66f15f5 Author: magnum Date: Mon Nov 25 08:31:10 2013 +0100 Optimize for aligned destination diff --git a/src/opencl/cryptmd5_kernel.cl b/src/opencl/cryptmd5_kernel.cl index f90555c..73ab99f 100644 --- a/src/opencl/cryptmd5_kernel.cl +++ b/src/opencl/cryptmd5_kernel.cl @@ -98,12 +98,20 @@ typedef struct { __constant uchar cl_md5_salt_prefix[] = "$1$"; __constant uchar cl_apr1_salt_prefix[] = "$apr1$"; -inline void buf_update(md5_ctx * ctx, uchar * string, uint len, uint buflen) +inline void buf_update(md5_ctx * ctx, uchar * string, uint buflen) { uint i; - for (i = 0; i < len; i++) - PUTCHAR(ctx->buffer, buflen + i, string[i]); + if (buflen & 3) { + // 8-bit copy + for (i = 0; i < 16; i++) + PUTCHAR(ctx->buffer, buflen + i, string[i]); + } else { + // 32-bit copy + buflen >>= 2; + for (i = 0; i < 4; i++) + ctx->buffer[buflen + i] = ((uint*)string)[i]; + } } inline void ctx_update(md5_ctx * ctx, uchar * string, uint len, @@ -356,7 +364,7 @@ __kernel void cryptmd5(__global const crypt_md5_password * inbuffer, nid = g[(i + 1) % 42]; // next ctx id to process, 0-7 ctx_out = &((uchar *) ctxs[nid].buffer)[altpos[nid]]; md5_digest(&ctxs[cid], alt_result, &ctxs_buflen[cid]); - buf_update(&ctxs[nid], (uchar *) alt_result, 16, altpos[nid]); + buf_update(&ctxs[nid], (uchar *) alt_result, altpos[nid]); cid = nid; } for (i = 0; i < 4; i++)