>From 9c331e58a2e83f0a04ffd9d702add21a06042344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claudio=20Andr=C3=A9?= Date: Thu, 28 Jun 2012 21:28:17 -0300 Subject: [PATCH] Improvement: more aggressive defaults on Nvidia. --- src/opencl_cryptsha512_fmt.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/opencl_cryptsha512_fmt.c b/src/opencl_cryptsha512_fmt.c index 66ecb03..6ee1feb 100644 --- a/src/opencl_cryptsha512_fmt.c +++ b/src/opencl_cryptsha512_fmt.c @@ -85,7 +85,7 @@ unsigned int get_task_max_size(){ return max_available * KEYS_PER_CORE_GPU; } -size_t get_default_workgroup(){ +size_t get_safe_workgroup(){ if (cpu(device_info[gpu_id])) return 1; @@ -94,6 +94,18 @@ size_t get_default_workgroup(){ return 32; } +size_t get_default_workgroup(){ + unsigned int max_available; + max_available = get_task_max_work_group_size(); + + if (gpu_nvidia(device_info[gpu_id])) { + global_work_size = (global_work_size / max_available) * max_available; //Find a multiple. + return max_available; + + } else + return get_safe_workgroup(); +} + /* ------- Create and destroy necessary objects ------- */ static void create_clobj(int gws) { pinned_saved_keys = clCreateBuffer(context[gpu_id], @@ -287,7 +299,7 @@ static void find_best_workgroup(void) { plaintext, 0, NULL, NULL), "Failed in clEnqueueWriteBuffer II"); - my_work_group = get_default_workgroup(); + my_work_group = get_safe_workgroup(); // Find minimum time for (; (int) my_work_group <= (int) max_group_size; @@ -483,13 +495,13 @@ static void init(struct fmt_main *pFmt) { printf("Elapsed time: %lu seconds\n", runtime); fflush(stdout); - global_work_size = get_task_max_size(); - local_work_size = get_default_workgroup(); - // create kernel to execute crypt_kernel = clCreateKernel(program[gpu_id], "kernel_crypt", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); + global_work_size = get_task_max_size(); + local_work_size = get_default_workgroup(); + if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(tmp_value); -- 1.7.9.5