# -*- coding: utf-8 -*- # Simple rainbow tables, experiment / toy example # We will precompute attack: "word1 word2 word3" where words are # picked from a list of 256 words. 2^24 candidates is a toy example. # That's too much for toy example. Let's use 2 words. # Copyright © 2015 Aleksey Cherepanov # # Redistribution and use in source and binary forms, with or without # modification, are permitted. # Tunable parameters word_count = 3 words = ["a" + str(i) for i in range(256)] chain_length = 200 color_count = 200 def hash_it(w): return hashlib.md5(w).digest() # Code starts here import hashlib import sys import itertools import time total_count = len(words) ** word_count def number_to_candidate(number): ws = [] l = len(words) for i in range(word_count): ws.append(words[number % l]) number /= l return " ".join(ws) def hash_to_number(h, position_in_chain): k = 256 r = ord(h[0]) i = 1 while k < total_count: k <<= 8 r <<= 8 r += ord(h[i]) i += 1 r += position_in_chain % color_count r %= total_count return r def generate(): start = time.time() tried = [False for i in range(total_count)] chains = [] print >> sys.stderr, "Generation: total count = {0}, chain_length = {1}, color_count = {2}".format(total_count, chain_length, color_count) k = 0 # for i in range(total_count): i = 0 while i < total_count: if k & 0xFFF == 0: t = tried.count(True) l = len(chains) tt = 0 ef = 0 avg_len = 0 if l > 0: tt = float(t) / l ef = float(l) / i avg_len = float(sum(c[2] for c in chains)) / l print >> sys.stderr, "i == {0}; l(c) == {1}, tried {2} {3:.3f}, t/c {4:.3f}, avg {5:.3f}".format(k, l, t, float(t) / total_count, tt, avg_len) if not tried[i]: ni = i for n in range(chain_length): w = number_to_candidate(ni) tried[ni] = True h = hash_it(w) ni = hash_to_number(h, n) # Save chain: # i - initial number, # h - final hash, # n - length of chain. chains.append((i, h, n + 1)) if len(chains) > total_count: print >> sys.stderr, "too many chains" break i += 1 k += 1 end = time.time() print >> sys.stderr, "chains", len(chains) print >> sys.stderr, "efficiency {0:.3f}".format(float(len(chains)) / total_count) # print >> sys.stderr, [c[2] for c in chains[0:10]] print >> sys.stderr, "generated in ", end - start return chains def prepare(chains): # We repack chains to be easy for lookups. r = {} for c in chains: i, h, n = c if h not in r: r[h] = [] r[h].append((i, n)) return r def crack(h, chains_hash): orig_h = h for offset in range(color_count): h = orig_h for i in range(offset, chain_length): if h in chains_hash: # Possible chains found. for chain in chains_hash[h]: ch = h cn = chain[0] for t in range(chain[1]): cw = number_to_candidate(cn) ch = hash_it(cw) if ch == orig_h: # We found the password! return cw cn = hash_to_number(ch, t) ni = hash_to_number(h, i) w = number_to_candidate(ni) h = hash_it(w) return None def check_good(chains_hash): k = 0 for ws in itertools.product(*([words] * word_count)): w = " ".join(ws) h = hash_it(w) r = crack(h, chains_hash) if r != w: print >> sys.stderr, "check failed on '{0}' with results '{1}', hash is {2}".format(w, r, h.encode('hex')) return else: k += 1 print >> sys.stderr, "all good, checked", k chains = generate() chains_hash = prepare(chains) # check_good(chains_hash) # print >> sys.stderr, crack(hash_it('asdf'), chains_hash) test_count = 100 test1 = [hash_it(str(i)) for i in range(test_count)] test2 = [hash_it(number_to_candidate(i)) for i in range(test_count)] test = test1 + test2 start = time.time() for h in test: # %% count good and bad crack(h, chains_hash) end = time.time() print >> sys.stderr, "end: count {0}, time {1} s, speed {2:.2f} c/s".format(len(test), end - start, float(len(test)) / (end - start)) start = time.time() for i in range(total_count): w = number_to_candidate(i) hash_it(w) # cmp_all() should be here end = time.time() print >> sys.stderr, "brute: time {0} s, speed {1:.2f} c/s".format(end - start, float(total_count) / (end - start))