Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Thu, 3 Sep 2015 03:34:54 +0300
From: Aleksey Cherepanov <lyosha@...nwall.com>
To: john-dev@...ts.openwall.com
Subject: reverse of full sha1 and sha256 limb when hash and block are known

tl;dr: if W and hash are known, initial state can be computed. ... PROFIT!

Looking at sha1 round, it is obvious that each round can be computed
backwards (i.e. starting from hash and going to initial state):
consider a, b, c, d, e after round known, then there are only 2
unknown variables: w[i] and e at the beginning of round. So knowing
w[i], it is trivial to right a formula to compute e. This way we go
reverse.

sha256 is a bit more complex, but it has only 1 unknown variable too.
So it is possible to write reverse algo straight-forward. PoC is
attached.

So sha1 and sha256 behave like block ciphers: you "encrypt" initial
state ("block of data") with W, that is expanded block of message
("key"). And it is possible to "decrypt" hash to get initial state.


There are several applications. The following 2 were pointed out to me
by Alexander Cherepanov, there are my ideas then.

1) To check 1 password against 1 hash, it is possible to meet in the
   middle computing halves in parallel. First of all, it is
   interleaving, but it is possible to vectorize shared part of
   regular and reverse algorithms. It is possible to speed up
   defensive usage this way.

2) To check 1 checksum against 1 big file, it is possible to compute
   checksum using 2 threads. (But usually(really?) checksums are
   checked for multiple files at the same time, thus multithreading is
   already there.)

3) Passwords longer than 1 block with repeating tails and same length
   have trade off: 1 computation for each hash for each tail or 1 full
   computation for each passwords. Lengths 56-64 (inclusive) need 2
   limbs but second block does not change at all (1 value for each
   length).

4) It is possible to hash beginning and tail independently if memory
   permits: compute initial states for each tail for each hash, put
   all of them into bitmask (lookup table), compute beginnings and
   check against the bitmask. (Is not it called "meet in the middle"
   attack?)


Easy practical application

Consider a hash sha256(sha256(...).sha256(...)), for instance
sha256(sha256($p).sha256($s))

sha256($p).sha256($s) produces exactly 1 block of message, so the
second block is 0x80, padding and constant length always. So we can
reverse the second block and check intermediate state computing only 1
limb instead of 2. That's up to 50% higher speed (considering that we
already have other optimizations including caching for sha256($p) and
sha256($s)). With all optimizations and many salts, it should have
same c/s as single block raw-sha256.


I only checked sha1 and sha256. But (many?) other hash functions can
have this behaviour. Also other crypto schemes can give constant
blocks. It is needed to investigate it more.

Thanks!

-- 
Regards,
Aleksey Cherepanov

/* Proof of concept backward (from hash to state) evaluation of sha-256 */

/* $ gcc backward.c && time ./a.out */

/*
 * Copyright  2015 Aleksey Cherepanov <lyosha@...nwall.com>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.
 */

#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
#pragma GCC optimize 3

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>

#define uc unsigned char
/* %% ui is not convenient, it causes many typos. */
#define ui unsigned int

#define bits 32

#define debug 0

#define rol(a, b) (((a) >> (bits - (b))) | ((a) << (b)))
#define ror(a, b) (((a) << (bits - (b))) | ((a) >> (b)))
#define bswap __builtin_bswap32

static const ui H[] = {
    0x6a09e667U, 0xbb67ae85U, 0x3c6ef372U, 0xa54ff53aU, 0x510e527fU, 0x9b05688cU, 0x1f83d9abU, 0x5be0cd19U
};

static const ui k[] = {
    0x428a2f98U, 0x71374491U, 0xb5c0fbcfU, 0xe9b5dba5U, 0x3956c25bU, 0x59f111f1U, 0x923f82a4U, 0xab1c5ed5U,
    0xd807aa98U, 0x12835b01U, 0x243185beU, 0x550c7dc3U, 0x72be5d74U, 0x80deb1feU, 0x9bdc06a7U, 0xc19bf174U,
    0xe49b69c1U, 0xefbe4786U, 0x0fc19dc6U, 0x240ca1ccU, 0x2de92c6fU, 0x4a7484aaU, 0x5cb0a9dcU, 0x76f988daU,
    0x983e5152U, 0xa831c66dU, 0xb00327c8U, 0xbf597fc7U, 0xc6e00bf3U, 0xd5a79147U, 0x06ca6351U, 0x14292967U,
    0x27b70a85U, 0x2e1b2138U, 0x4d2c6dfcU, 0x53380d13U, 0x650a7354U, 0x766a0abbU, 0x81c2c92eU, 0x92722c85U,
    0xa2bfe8a1U, 0xa81a664bU, 0xc24b8b70U, 0xc76c51a3U, 0xd192e819U, 0xd6990624U, 0xf40e3585U, 0x106aa070U,
    0x19a4c116U, 0x1e376c08U, 0x2748774cU, 0x34b0bcb5U, 0x391c0cb3U, 0x4ed8aa4aU, 0x5b9cca4fU, 0x682e6ff3U,
    0x748f82eeU, 0x78a5636fU, 0x84c87814U, 0x8cc70208U, 0x90befffaU, 0xa4506cebU, 0xbef9a3f7U, 0xc67178f2U
};

int main(void)
{
    assert(sizeof(ui) == 4);

    const uc password[] = "asdf";
    /* $ printf asdf | sha256sum - | perl -pe 's! .*!!; s/../\\x$&/g' */
    const uc hash[] = "\xf0\xe4\xc2\xf7\x6c\x58\x91\x6e\xc2\x58\xf2\x46\x85\x1b\xea\x09\x1d\x14\xd4\x24\x7a\x2f\xc3\xe1\x86\x94\x46\x1b\x18\x16\xe1\x3b";

    size_t block_size = 16 * 4;
    uc block[block_size];
    memset(block, 0, sizeof(block));

    size_t len = sizeof(password) - 1;
    memcpy(block, password, len);
    block[len] = 0x80;
    /* We use 1 byte. So length is very limited. */
    /* %% do that in W. */
    /* It is already byte swapped. */
    block[block_size - 4] = len << 3;

    int i;

    /* Expander goes forward as usual */
    ui w[64];
    memcpy(w, block, sizeof(block));
    for (i = 0; i < 14; i++) {
        w[i] = bswap(w[i]);
    }
    for (i = 16; i < 64; i++) {
        ui s0 = ror(w[i - 15], 7) ^ ror(w[i - 15], 18) ^ (w[i - 15] >> 3);
        ui s1 = ror(w[i - 2], 17) ^ ror(w[i - 2], 19) ^ (w[i - 2] >> 10);
        w[i] = w[i - 16] + s0 + w[i - 7] + s1;
    }

    ui r[8];

#define r1 (r[0])
#define r2 (r[1])
#define r3 (r[2])
#define r4 (r[3])
#define r5 (r[4])
#define r6 (r[5])
#define r7 (r[6])
#define r8 (r[7])

#define a r1
#define b r2
#define c r3
#define d r4
#define e r5
#define f r6
#define g r7
#define h r8


#if debug
    /* Forward computation for debugging */
    memcpy(r, H, sizeof(r));
    for (i = 0; i < 64; i++) {
        ui s0 = ror(a, 2) ^ ror(a, 13) ^ ror(a, 22);
        ui maj = (a & b) ^ (a & c) ^ (b & c);
        ui t2 = s0 + maj;
        ui s1 = ror(e, 6) ^ ror(e, 11) ^ ror(e, 25);
        ui ch = (e & f) ^ (~e & g);
        ui t1 = h + s1 + ch + k[i] + w[i];
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
        printf("%2d: %08x %08x %08x %08x %08x %08x %08x %08x  k %08x w %08x t1 %08x t2 %08x\n", i, a, b, c, d, e, f, g, h, k[i], w[i], t1, t2);
        printf(" s1 %08x  ch %08x\n", s1, ch);
    }
#endif



    /* Backward computations */

    memcpy(r, hash, sizeof(r));

    for (i = 0; i < 8; i++) {
        r[i] = bswap(r[i]);
        r[i] -= H[i];
    }

    for (i = 63; i >= 0; i--) {

/* letter at beginning of round, "_", r* in final state */
#define a_r2 r2
#define b_r3 r3
#define c_r4 r4

        ui t2 = (ror(a_r2, 2) ^ ror(a_r2, 13) ^ ror(a_r2, 22)) + ((a_r2 & b_r3) ^ (a_r2 & c_r4) ^ (b_r3 & c_r4));
        ui t1 = a - t2;

        /* "Rotate" array of state variables. */

        /* Forward: b = a */
        a = b;
        /* Forward: c = b */
        b = c;
        /* Forward: d = c */
        c = d;
        /* Forward: e = d + t1 */
        d = e - t1;
        /* Forward: f = e */
        e = f;
        /* Forward: g = f */
        f = g;
        /* Forward: h = g */
        g = h;

        /* Forward: t1 = h + s1 + ch + k[i] + w[i] */
        /* Backward: h = t1 - s1 - ch - k[i] - w[i] */
        /*        or h = t1 - (s1 + ch + k[i] + w[i]) */
        /*        (bigger shared part ^) */
        ui s1 = ror(e, 6) ^ ror(e, 11) ^ ror(e, 25);
        ui ch = (e & f) ^ (~e & g);
        h = t1 - s1 - ch - k[i] - w[i];

    }

    if (!memcmp(H, r, sizeof(r))) {
        printf("good!\n");
    } else {
        printf("failure\n");
        for (i = 0; i < 8; i++) {
            printf("%2d: H %08x  r %08x%s\n", i, H[i], r[i], H[i] == r[i] ? "" : "  <=== differ");
        }
    }

    return 0;
}

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ