# -*- coding: utf-8 -*- # Simple rainbow tables, experiment / toy example # We will precompute attack: "word1 word2 word3" where words are # picked from a list of 256 words. 2^24 candidates is a toy example. # That's too much for toy example. Let's use 2 words. # Copyright © 2015 Aleksey Cherepanov # # Redistribution and use in source and binary forms, with or without # modification, are permitted. # Tunable parameters word_count = 2 words = ["a" + str(i) for i in range(256)] chain_length = 20 # Code starts here import hashlib import sys import itertools import time total_count = len(words) ** word_count def number_to_candidate(number): ws = [] l = len(words) for i in range(word_count): ws.append(words[number % l]) number /= l return " ".join(ws) def hash_to_number(h): k = 256 # h = h[5:] r = ord(h[0]) i = 1 while k < total_count: k <<= 8 r <<= 8 r += ord(h[i]) i += 1 r %= total_count return r def generate(): start = time.time() tried = [False for i in range(total_count)] chains = [] print >> sys.stderr, "Generation: total count = {0}, chain_length = {1}".format(total_count, chain_length) k = 0 # for i in range(total_count): i = 0 while i < total_count: if k & 0xFFF == 0: t = tried.count(True) l = len(chains) tt = 0 ef = 0 avg_len = 0 if l > 0: tt = float(t) / l ef = float(l) / i avg_len = float(sum(c[2] for c in chains)) / l print >> sys.stderr, "i == {0}; l(c) == {1}, tried {2} {3:.3f}, t/c {4:.3f}, avg {5:.3f}".format(k, l, t, float(t) / total_count, tt, avg_len) if not tried[i]: ni = i for n in range(chain_length): w = number_to_candidate(ni) tried[ni] = True h = hashlib.md5(w).digest() ni = hash_to_number(h) # If new candidate is already tried then we stop and # save short chain. if tried[ni]: # if ni in [tc[0] for tc in chains]: # print >> sys.stderr, "hi there" break # Save chain: # i - initial number, # h - final hash, # n - length of chain. chains.append((i, h, n + 1)) if len(chains) > total_count: print >> sys.stderr, "too many chains" break i += 1 k += 1 end = time.time() print >> sys.stderr, "chains", len(chains) print >> sys.stderr, "efficiency {0:.3f}".format(float(len(chains)) / total_count) print >> sys.stderr, [c[2] for c in chains[0:10]] print >> sys.stderr, "generated in ", end - start return chains def prepare(chains): # We repack chains to be easily for lookups. r = {} for c in chains: i, h, n = c r[h] = (i, n) return r def crack(h, chains_hash): orig_h = h for i in range(chain_length): if h in chains_hash and chains_hash[h][1] > i: # Possible chain found. ch = h cn = chains_hash[h][0] for t in range(chains_hash[h][1]): cw = number_to_candidate(cn) ch = hashlib.md5(cw).digest() if ch == orig_h: # We found the password! return cw cn = hash_to_number(ch) ni = hash_to_number(h) w = number_to_candidate(ni) h = hashlib.md5(w).digest() return None def check_good(chains_hash): k = 0 for ws in itertools.product(*([words] * word_count)): w = " ".join(ws) h = hashlib.md5(w).digest() r = crack(h, chains_hash) if r != w: print >> sys.stderr, "check failed on '{0}' with results '{1}', hash is {2}".format(w, r, h.encode('hex')) return else: k += 1 print >> sys.stderr, "all good, checked", k chains = generate() chains_hash = prepare(chains) check_good(chains_hash) # print >> sys.stderr, crack(hashlib.md5('asdf').digest(), chains_hash) test_count = 1000 test1 = [hashlib.md5(str(i)).digest() for i in range(test_count)] test2 = [hashlib.md5(number_to_candidate(i)).digest() for i in range(test_count)] test = test1 + test2 start = time.time() for h in test: # %% count good and bad crack(h, chains_hash) end = time.time() print >> sys.stderr, "end: count {0}, time {1} s, speed {2:.2f} c/s".format(len(test), end - start, float(len(test)) / (end - start)) start = time.time() for i in range(total_count): w = number_to_candidate(i) hashlib.md5(w).digest() # cmp_all() should be here end = time.time() print >> sys.stderr, "brute: time {0} s, speed {1:.2f} c/s".format(end - start, float(total_count) / (end - start))