# -*- coding: utf-8 -*-
# Simple rainbow tables, experiment / toy example

# We will precompute attack: "word1 word2 word3" where words are
# picked from a list of 256 words. 2^24 candidates is a toy example.
# That's too much for toy example. Let's use 2 words.

# Copyright © 2015 Aleksey Cherepanov <lyosha@openwall.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.


# Tunable parameters

word_count = 2
words = ["a" + str(i) for i in range(256)]

chain_length = 20

# Code starts here

import hashlib
import sys
import itertools
import time

total_count = len(words) ** word_count

def number_to_candidate(number):
    ws = []
    l = len(words)
    for i in range(word_count):
        ws.append(words[number % l])
        number /= l
    return " ".join(ws)

def hash_to_number(h):
    k = 256
    # h = h[5:]
    r = ord(h[0])
    i = 1
    while k < total_count:
        k <<= 8
        r <<= 8
        r += ord(h[i])
        i += 1
    r %= total_count
    return r

def generate():
    start = time.time()
    tried = [False for i in range(total_count)]
    chains = []
    print >> sys.stderr, "Generation: total count = {0}, chain_length = {1}".format(total_count, chain_length)
    k = 0
    # for i in range(total_count):
    i = 0
    while i < total_count:
        if k & 0xFFF == 0:
            t = tried.count(True)
            l = len(chains)
            tt = 0
            ef = 0
            avg_len = 0
            if l > 0:
                tt = float(t) / l
                ef = float(l) / i
                avg_len = float(sum(c[2] for c in chains)) / l
            print >> sys.stderr, "i == {0}; l(c) == {1}, tried {2} {3:.3f}, t/c {4:.3f}, avg {5:.3f}".format(k, l, t, float(t) / total_count, tt, avg_len)
        if not tried[i]:
            ni = i
            for n in range(chain_length):
                w = number_to_candidate(ni)
                tried[ni] = True
                h = hashlib.md5(w).digest()
                ni = hash_to_number(h)
                # If new candidate is already tried then we stop and
                # save short chain.
                if tried[ni]:
                    # if ni in [tc[0] for tc in chains]:
                    #     print >> sys.stderr, "hi there"
                    break
            # Save chain:
            # i - initial number,
            # h - final hash,
            # n - length of chain.
            chains.append((i, h, n + 1))
        if len(chains) > total_count:
            print >> sys.stderr, "too many chains"
            break
        i += 1
        k += 1
    end = time.time()
    print >> sys.stderr, "chains", len(chains)
    print >> sys.stderr, "efficiency {0:.3f}".format(float(len(chains)) / total_count)
    print >> sys.stderr, [c[2] for c in chains[0:10]]
    print >> sys.stderr, "generated in ", end - start
    return chains

def prepare(chains):
    # We repack chains to be easily for lookups.
    r = {}
    for c in chains:
        i, h, n = c
        r[h] = (i, n)
    return r

def crack(h, chains_hash):
    orig_h = h
    for i in range(chain_length):
        if h in chains_hash and chains_hash[h][1] > i:
            # Possible chain found.
            ch = h
            cn = chains_hash[h][0]
            for t in range(chains_hash[h][1]):
                cw = number_to_candidate(cn)
                ch = hashlib.md5(cw).digest()
                if ch == orig_h:
                    # We found the password!
                    return cw
                cn = hash_to_number(ch)
        ni = hash_to_number(h)
        w = number_to_candidate(ni)
        h = hashlib.md5(w).digest()
    return None

def check_good(chains_hash):
    k = 0
    for ws in itertools.product(*([words] * word_count)):
        w = " ".join(ws)
        h = hashlib.md5(w).digest()
        r = crack(h, chains_hash)
        if r != w:
            print >> sys.stderr, "check failed on '{0}' with results '{1}', hash is {2}".format(w, r, h.encode('hex'))
            return
        else:
            k += 1
    print >> sys.stderr, "all good, checked", k


chains = generate()

chains_hash = prepare(chains)

check_good(chains_hash)

# print >> sys.stderr, crack(hashlib.md5('asdf').digest(), chains_hash)

test_count = 1000
test1 = [hashlib.md5(str(i)).digest() for i in range(test_count)]
test2 = [hashlib.md5(number_to_candidate(i)).digest() for i in range(test_count)]
test = test1 + test2
start = time.time()
for h in test:
    # %% count good and bad
    crack(h, chains_hash)
end = time.time()
print >> sys.stderr, "end: count {0}, time {1} s, speed {2:.2f} c/s".format(len(test), end - start, float(len(test)) / (end - start))

start = time.time()
for i in range(total_count):
    w = number_to_candidate(i)
    hashlib.md5(w).digest()
    # cmp_all() should be here
end = time.time()
print >> sys.stderr, "brute: time {0} s, speed {1:.2f} c/s".format(end - start, float(total_count) / (end - start))