Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sat, 22 Aug 2015 10:42:42 +0200
From: Agnieszka Bielec <bielecagnieszka8@...il.com>
To: john-dev@...ts.openwall.com
Subject: Re: PHC: Argon2 on GPU

2015-08-22 3:48 GMT+02:00 Solar Designer <solar@...nwall.com>:
> On Fri, Aug 21, 2015 at 05:40:42PM +0200, Agnieszka Bielec wrote:
>> 2015-08-20 22:34 GMT+02:00 Solar Designer <solar@...nwall.com>:
>> > You could start by experimenting with a much simpler than Argon2 yet in
>> > some ways similar kernel: implement some trivial operation like XOR on
>> > different vector widths and see whether/how this changes the assembly.
>> > Then make it slightly less trivial (just enough to prevent the compiler
>> > from optimizing things out) and add uses of private or local memory,
>> > and see if you can make it run faster by using wider vectors per the
>> > same private or local memory usage.
>>
>> I tested (only 960m)
>> -copying memory from __private to __private
>> - from __global to __private
>> -xoring private tables with __prrivate tables
>>
>> using ulong, ulong2, ulong4, ulong8 (I was getting empty kernel using ulong16)
>
> Great.  Where is the OpenCL code for these?  I'd like to take a look at
> what exactly you were testing.

xoring:


#define MOD 4
#define type ulong4

void func(type * table)
{ //prohibits optimizing code
}

__kernel void pomelo_crypt_kernel(__global const uchar * in,
    __global const uint * index,
    __global char *out,
    __global struct pomelo_salt *salt, __global type *S)
{
    int i,j;
    uint gid;
    gid = get_global_id(0);
    S+=gid*1024/MOD;
    type copy1[1024/MOD];
    type copy2[1024/MOD];
    for(j=0;j<1024/MOD;j++)
            copy1[j]=S[1024/MOD-j];
    for(j=0;j<1024/MOD;j++)
            copy2[j]=S[j];
    for(i=0;i<1000;i++)
    {
        for(j=0;j<1024/MOD;j++)
            copy1[j]^=copy2[1024/MOD-j];
        func(copy1);
        for(j=0;j<1024/MOD;j++)
            copy1[j]^=copy2[j];
        func(copy1);
    }
    out[gid]=((ulong*)copy1)[0];
}

copying from global to private:
__kernel void pomelo_crypt_kernel(__global const uchar * in,
    __global const uint * index,
    __global char *out,
    __global struct pomelo_salt *salt, __global type *S)
{
    int i,j;
    uint gid;
    gid = get_global_id(0);
    S+=gid*1024/MOD;
    type copy1[1024/MOD];
    type copy2[1024/MOD];
    for(i=0;i<1000;i++)
    {
        for(j=0;j<1024/MOD;j++)
            copy1[j]=S[1024/MOD-j];
        func(copy1);
        for(j=0;j<1024/MOD;j++)
            copy1[j]=S[j];
        func(copy1);
    }
    out[gid]=((ulong*)copy1)[0];
}

copying from private to private. I didn't have defines yet:

__kernel void pomelo_crypt_kernel(__global const uchar * in,
    __global const uint * index,
    __global char *out,
    __global struct pomelo_salt *salt, __global ulong2 *S)
{
    int i,j;
    uint gid;
    gid = get_global_id(0);
    S+=gid*1024/2;
    ulong2 copy1[1024/2];
    ulong2 copy2[1024/2];
    for(i=0;i<1024/2;i++)
    {
        copy1[i]=S[i+0];
    }
    for(i=0;i<1000;i++)
    {
        for(j=0;j<1024/2;j++)
            copy2[j]=copy1[1024/2-j];
        func(copy2);
        for(j=0;j<1024/2;j++)
            copy1[j]=copy2[j];
        func(copy1);
    }
}

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.