# -*- coding: utf-8 -*-
# Simple rainbow tables, experiment / toy example

# We will precompute attack: "word1 word2 word3" where words are
# picked from a list of 256 words. 2^24 candidates is a toy example.
# That's too much for toy example. Let's use 2 words.

# Copyright © 2015 Aleksey Cherepanov <lyosha@openwall.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.


# Tunable parameters

word_count = 3
words = ["a" + str(i) for i in range(256)]

chain_length = 200

color_count = 200

def hash_it(w):
    return hashlib.md5(w).digest()

# Code starts here

import hashlib
import sys
import itertools
import time

total_count = len(words) ** word_count

def number_to_candidate(number):
    ws = []
    l = len(words)
    for i in range(word_count):
        ws.append(words[number % l])
        number /= l
    return " ".join(ws)

def hash_to_number(h, position_in_chain):
    k = 256
    r = ord(h[0])
    i = 1
    while k < total_count:
        k <<= 8
        r <<= 8
        r += ord(h[i])
        i += 1
    r += position_in_chain % color_count
    r %= total_count
    return r

def generate():
    start = time.time()
    tried = [False for i in range(total_count)]
    chains = []
    print >> sys.stderr, "Generation: total count = {0}, chain_length = {1}, color_count = {2}".format(total_count, chain_length, color_count)
    k = 0
    # for i in range(total_count):
    i = 0
    while i < total_count:
        if k & 0xFFF == 0:
            t = tried.count(True)
            l = len(chains)
            tt = 0
            ef = 0
            avg_len = 0
            if l > 0:
                tt = float(t) / l
                ef = float(l) / i
                avg_len = float(sum(c[2] for c in chains)) / l
            print >> sys.stderr, "i == {0}; l(c) == {1}, tried {2} {3:.3f}, t/c {4:.3f}, avg {5:.3f}".format(k, l, t, float(t) / total_count, tt, avg_len)
        if not tried[i]:
            ni = i
            for n in range(chain_length):
                w = number_to_candidate(ni)
                tried[ni] = True
                h = hash_it(w)
                ni = hash_to_number(h, n)
            # Save chain:
            # i - initial number,
            # h - final hash,
            # n - length of chain.
            chains.append((i, h, n + 1))
        if len(chains) > total_count:
            print >> sys.stderr, "too many chains"
            break
        i += 1
        k += 1
    end = time.time()
    print >> sys.stderr, "chains", len(chains)
    print >> sys.stderr, "efficiency {0:.3f}".format(float(len(chains)) / total_count)
    # print >> sys.stderr, [c[2] for c in chains[0:10]]
    print >> sys.stderr, "generated in ", end - start
    return chains

def prepare(chains):
    # We repack chains to be easy for lookups.
    r = {}
    for c in chains:
        i, h, n = c
        if h not in r:
            r[h] = []
        r[h].append((i, n))
    return r

def crack(h, chains_hash):
    orig_h = h
    for offset in range(color_count):
        h = orig_h
        for i in range(offset, chain_length):
            if h in chains_hash:
                # Possible chains found.
                for chain in chains_hash[h]:
                    ch = h
                    cn = chain[0]
                    for t in range(chain[1]):
                        cw = number_to_candidate(cn)
                        ch = hash_it(cw)
                        if ch == orig_h:
                            # We found the password!
                            return cw
                        cn = hash_to_number(ch, t)
            ni = hash_to_number(h, i)
            w = number_to_candidate(ni)
            h = hash_it(w)
    return None

def check_good(chains_hash):
    k = 0
    for ws in itertools.product(*([words] * word_count)):
        w = " ".join(ws)
        h = hash_it(w)
        r = crack(h, chains_hash)
        if r != w:
            print >> sys.stderr, "check failed on '{0}' with results '{1}', hash is {2}".format(w, r, h.encode('hex'))
            return
        else:
            k += 1
    print >> sys.stderr, "all good, checked", k


chains = generate()

chains_hash = prepare(chains)

# check_good(chains_hash)

# print >> sys.stderr, crack(hash_it('asdf'), chains_hash)

test_count = 100
test1 = [hash_it(str(i)) for i in range(test_count)]
test2 = [hash_it(number_to_candidate(i)) for i in range(test_count)]
test = test1 + test2
start = time.time()
for h in test:
    # %% count good and bad
    crack(h, chains_hash)
end = time.time()
print >> sys.stderr, "end: count {0}, time {1} s, speed {2:.2f} c/s".format(len(test), end - start, float(len(test)) / (end - start))

start = time.time()
for i in range(total_count):
    w = number_to_candidate(i)
    hash_it(w)
    # cmp_all() should be here
end = time.time()
print >> sys.stderr, "brute: time {0} s, speed {1:.2f} c/s".format(end - start, float(total_count) / (end - start))