diff --git a/src/opencl_parallel_fmt_plug.c b/src/opencl_parallel_fmt_plug.c index a184c38..f6e3dd7 100644 --- a/src/opencl_parallel_fmt_plug.c +++ b/src/opencl_parallel_fmt_plug.c @@ -56,8 +56,8 @@ john_register_one(&fmt_opencl_parallel); #include "memdbg.h" static const char *warn[] = { - "xfer salt1: ", ", xfer salt2: ", ", xfer keys: ", ", xfer idx: ", - ", crypt: ", ", xfer: " + "xfer salt: ", ", xfer keys: ", ", xfer idx: ", ", init: ", + ", loop: ", ", loop finish: ", ", xfer results: " }; #define MIN(a, b) (((a) > (b)) ? (b) : (a)) @@ -97,7 +97,7 @@ uint64_t sequentialLoops; static int source_in_use; -static int split_events[] = { 2, -1, -1 }; +static int split_events[] = { 4, 5, -1 }; static void *get_salt(char *ciphertext); static int crypt_all(int *pcount, struct db_salt *salt); @@ -342,12 +342,12 @@ static void init(struct fmt_main *self) //Initialize openCL tuning (library) for this format. - opencl_init_auto_setup(SEED, 0, NULL, + opencl_init_auto_setup(SEED, 3*5*128*1, split_events, warn, 4, self, create_clobj, release_clobj, BINARY_SIZE*3, 0); //Auto tune execution from shared/included code. self->methods.crypt_all = crypt_all_benchmark; - autotune_run(self, 1000, 0, 1000); + autotune_run(self, 3*5*128*1, 0, 1000); self->methods.crypt_all = crypt_all; } @@ -550,7 +550,7 @@ static int crypt_all(int *pcount, struct db_salt *salt) if (idx_offset > 4 * (global_work_size + 1)) idx_offset = 0; - printf("crypt_all\n"); + printf("crypt_all loops %zu\n", (size_t)sequentialLoops); HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_salt, CL_FALSE, 0, sizeof(struct parallel_salt), saved_salt, 0, NULL, multi_profilingEvent[0]), "Failed transferring salt"); @@ -573,15 +573,15 @@ static int crypt_all(int *pcount, struct db_salt *salt) multi_profilingEvent[3]), "failed in clEnqueueNDRangeKernel"); - for(i=0;i 4 * (global_work_size + 1)) idx_offset = 0; - printf("crypt_all_benchmark\n"); - - BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_salt, + printf("crypt_all_bench loops %zu\n", (size_t)sequentialLoops); + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_salt, CL_FALSE, 0, sizeof(struct parallel_salt), saved_salt, 0, NULL, multi_profilingEvent[0]), "Failed transferring salt"); @@ -636,25 +627,37 @@ static int crypt_all_benchmark(int *pcount, struct db_salt *salt) key_idx - key_offset, saved_key + key_offset, 0, NULL, multi_profilingEvent[1]), "Failed transferring keys"); - BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_idx, + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_idx, CL_FALSE, idx_offset, sizeof(cl_uint) * (global_work_size + 1) - idx_offset, saved_idx + (idx_offset / sizeof(cl_uint)), 0, NULL, multi_profilingEvent[2]), "Failed transferring index"); - BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel_loop, 1, + HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel_init, 1, NULL, &global_work_size, lws, 0, NULL, - multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel crypt_kernel_loop"); + multi_profilingEvent[3]), "failed in clEnqueueNDRangeKernel"); + for(i=0;i