# -*- coding: utf-8 -*- # Experiment with function for fast reject # Copyright © 2015 Aleksey Cherepanov # # Redistribution and use in source and binary forms, with or without # modification, are permitted. # ###################################################################### # Tunable parameters # (Actually the whole algo may be tuned...) # table_bits == 4 allows sse's shuffle to perform dereference table_bits = 4 # for tests test_target_count = 2**5 # %% it should be auto-tunable table_count = 2 rand_value = lambda: random.randint(0, 255) # rand_value = lambda: random.randint(0, 1) # rand_value = lambda: random.randint(0, 7) # rand_value = lambda: random.randint(0, 3) # rand_value = lambda: random.randint(0, 2 ** 5 - 1) rand_value_for_init = lambda: random.randint(0, 255) mutations_for_each_table = 1 # ###################################################################### # Code import random import sys table_size = 2 ** table_bits table_max = table_size - 1 # return 0 on success and 1-255 in other cases def check(candidate, tables): r = 0 for c, t in zip(candidate, tables): r ^= t[c] # Having 0 for misses, it may be easier to build tables that don't # reject what they should not reject r = 1 if r == 0 else 0 return r def make_tables(target): tables = [[rand_value_for_init() for j in range(table_size)] for i in range(table_count)] ar, aw, rr, rw, total = measure_tables(tables, target) count_multiplexer = lambda rw, rr: rw * 1000000 - rr deep_copy = lambda t: map(list, t) rand_bit = lambda: 1 << random.randint(0, 7) rand_table = lambda: random.randint(0, table_count - 1) rand_index = lambda: random.randint(0, table_max) # hill climbing old_rw = rw old_aw = aw old_rr = rr old_tables = deep_copy(tables) while old_rw != 0 or old_aw > old_rr: # while old_rw != 0 or old_rr < total / 2: # reset tables tables = deep_copy(old_tables) # mutate tables # tables[rand_table()][rand_index()] = rand_value() # tables[0][rand_index()] ^= rand_bit() for i in range(table_count): for j in range(mutations_for_each_table): # tables[i][rand_index()] ^= rand_value() tables[i][rand_index()] = rand_value() # measure tables ar, aw, rr, rw, total = measure_tables(tables, target) # Did we improve tables? if count_multiplexer(rw, rr) < count_multiplexer(old_rw, old_rr): # we improved state, save it old_rw = rw old_rr = rr old_aw = aw old_tables = deep_copy(tables) print >> sys.stderr, rw, rr, aw, count_multiplexer(rw, rr) print >> sys.stderr, "finish", old_rr, old_rw return tables def measure_tables(tables, test_target): # test tables: we try all possible candidates test_candidate = [0] * table_count test_target_hash = {} for t in test_target: if t in test_target_hash: test_target_hash[t] += 1 else: test_target_hash[t] = 1 # We need a function that: # - never rejects elements of target set # - rarely accepts elements not from target set, but such are # permitted # So our target is accepted_right == number of keys in # test_target_hash, rejected_wrong == 0, high rejected_right and low # accepted_wrong accepted_wrong = 0 accepted_right = 0 rejected_wrong = 0 rejected_right = 0 total = 0 while True: tup = tuple(test_candidate) r = check(tup, tables) total += 1 should_accept = tup in test_target_hash if r == 0: if should_accept: accepted_right += 1 else: accepted_wrong += 1 else: if should_accept: rejected_wrong += 1 else: rejected_right += 1 # pick next candidate i = 0 test_candidate[i] += 1 while i < len(test_candidate) - 1 and test_candidate[i] == table_size: test_candidate[i] = 0 test_candidate[i + 1] += 1 i += 1 if i == len(test_candidate) - 1 and test_candidate[i] == table_size: # that's all break return [accepted_right, accepted_wrong, rejected_right, rejected_wrong, total] # ###################################################################### # Test run # We return tables_count random numbers with table_bits bits. Like # get_hash() but already split into packs suitable for indexing. def make_candidate(): return tuple(random.randint(0, table_max) for i in range(table_count)) # make test set test_target = tuple(make_candidate() for i in range(test_target_count)) # test_target can have duplicates. Actually we'd like to have more of # them. # %% implement choosing of bits to check # make tables to check with tables = make_tables(test_target) accepted_right, accepted_wrong, rejected_right, rejected_wrong, total = measure_tables(tables, test_target) print >> sys.stderr, "accepted_wrong", accepted_wrong print >> sys.stderr, "accepted_right", accepted_right print >> sys.stderr, "rejected_wrong", rejected_wrong print >> sys.stderr, "rejected_right", rejected_right, float(rejected_right) / total print >> sys.stderr, "total real", total