📄 checksum_test.cc
字号:
/* Copyright (C) 2007 Josh MacDonald */extern "C" {#include "test.h"}#include <list>#include <vector>#include <map>#include <algorithm>using std::list;using std::map;using std::vector;// MLCG parameters// a, a*uint32_t good_32bit_values[] = { 1597334677U, // ... 741103597U, 887987685U,};// a, a*uint64_t good_64bit_values[] = { 1181783497276652981ULL, 4292484099903637661ULL, 7664345821815920749ULL, // ...};struct true_type { };struct false_type { };template <typename Word>int bitsof();template<>int bitsof<uint32_t>() { return 32;}template<>int bitsof<uint64_t>() { return 64;}struct plain { int operator()(const uint8_t &c) { return c; }};template <typename Word>struct hhash { // take "h" of the high-bits as a hash value for this // checksum, which are the most "distant" in terms of the // spectral test for the rabin_karp MLCG. For short windows, // the high bits aren't enough, XOR "mask" worth of these in. Word operator()(const Word& t, const int &h, const int &mask) { return (t >> h) ^ (t & mask); }};template <typename Word>Word good_word();template<>uint32_t good_word<uint32_t>() { return good_32bit_values[0];}template<>uint64_t good_word<uint64_t>() { return good_64bit_values[0];}// CLASSES#define SELF Word, CksumSize, CksumSkip, Permute, Hash, Compaction#define MEMBER template <typename Word, \ int CksumSize, \ int CksumSkip, \ typename Permute, \ typename Hash, \ int Compaction>MEMBERstruct cksum_params { typedef Word word_type; typedef Permute permute_type; typedef Hash hash_type; enum { cksum_size = CksumSize, cksum_skip = CksumSkip, compaction = Compaction, };};MEMBERstruct rabin_karp { typedef Word word_type; typedef Permute permute_type; typedef Hash hash_type; enum { cksum_size = CksumSize, cksum_skip = CksumSkip, compaction = Compaction, }; // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ... rabin_karp() { multiplier = good_word<Word>(); powers = new Word[cksum_size]; powers[cksum_size - 1] = 1; for (int i = cksum_size - 2; i >= 0; i--) { powers[i] = powers[i + 1] * multiplier; } product = powers[0] * multiplier; } ~rabin_karp() { delete [] powers; } Word step(const uint8_t *ptr) { Word h = 0; for (int i = 0; i < cksum_size; i++) { h += permute_type()(ptr[i]) * powers[i]; } return h; } Word state0(const uint8_t *ptr) { incr_state = step(ptr); return incr_state; } Word incr(const uint8_t *ptr) { incr_state = multiplier * incr_state - product * permute_type()(ptr[-1]) + permute_type()(ptr[cksum_size - 1]); return incr_state; } Word *powers; Word product; Word multiplier; Word incr_state;};MEMBERstruct adler32_cksum { typedef Word word_type; typedef Permute permute_type; typedef Hash hash_type; enum { cksum_size = CksumSize, cksum_skip = CksumSkip, compaction = Compaction, }; Word step(const uint8_t *ptr) { return xd3_lcksum (ptr, cksum_size); } Word state0(const uint8_t *ptr) { incr_state = step(ptr); return incr_state; } Word incr(const uint8_t *ptr) { incr_state = xd3_large_cksum_update (incr_state, ptr - 1, cksum_size); return incr_state; } Word incr_state;};// TESTStemplate <typename Word>struct file_stats { typedef list<const uint8_t*> ptr_list; typedef Word word_type; typedef map<word_type, ptr_list> table_type; typedef typename table_type::iterator table_iterator; typedef typename ptr_list::iterator ptr_iterator; int cksum_size; int cksum_skip; int unique; int unique_values; int count; table_type table; file_stats(int size, int skip) : cksum_size(size), cksum_skip(skip), unique(0), unique_values(0), count(0) { } void reset() { unique = 0; unique_values = 0; count = 0; table.clear(); } void update(const word_type &word, const uint8_t *ptr) { table_iterator t_i = table.find(word); count++; if (t_i == table.end()) { table.insert(make_pair(word, ptr_list())); } ptr_list &pl = table[word]; for (ptr_iterator p_i = pl.begin(); p_i != pl.end(); ++p_i) { if (memcmp(*p_i, ptr, cksum_size) == 0) { return; } } unique++; pl.push_back(ptr); } void freeze() { unique_values = table.size(); table.clear(); }};struct test_result_base;static vector<test_result_base*> all_tests;struct test_result_base { virtual ~test_result_base() { } virtual void reset() = 0; virtual void print() = 0; virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0; virtual void stat() = 0; virtual int count() = 0; virtual int dups() = 0; virtual double uniqueness() = 0; virtual double fullness() = 0; virtual double collisions() = 0; virtual double coverage() = 0; virtual double compression() = 0; virtual double time() = 0; virtual double score() = 0; virtual void set_score(double min_dups_frac, double min_time) = 0; virtual double total_time() = 0; virtual int total_count() = 0; virtual int total_dups() = 0;};struct compare_h { bool operator()(test_result_base *a, test_result_base *b) { return a->score() < b->score(); }};MEMBERstruct test_result : public test_result_base { typedef Word word_type; typedef Permute permute_type; typedef Hash hash_type; enum { cksum_size = CksumSize, cksum_skip = CksumSkip, compaction = Compaction, }; const char *test_name; file_stats<Word> fstats; int test_size; int n_steps; int n_incrs; int s_bits; int s_mask; int t_entries; int h_bits; int h_buckets_full; double h_score; char *hash_table; long accum_millis; int accum_iters; // These are not reset double accum_time; int accum_count; int accum_dups; int accum_colls; int accum_size; test_result(const char *name) : test_name(name), fstats(cksum_size, cksum_skip), hash_table(NULL), accum_millis(0), accum_iters(0), accum_time(0.0), accum_count(0), accum_dups(0), accum_colls(0), accum_size(0) { all_tests.push_back(this); } ~test_result() { reset(); } void reset() { // size of file test_size = -1; // count n_steps = -1; n_incrs = -1; // four values used by new_table()/summarize_table() s_bits = -1; s_mask = -1; t_entries = -1; h_bits = -1; h_buckets_full = -1; accum_millis = 0; accum_iters = 0; fstats.reset(); // temporary if (hash_table) { delete(hash_table); hash_table = NULL; } } int count() { if (cksum_skip == 1) { return n_incrs; } else { return n_steps; } } int dups() { return fstats.count - fstats.unique; } int colls() { return fstats.unique - fstats.unique_values; } double uniqueness() { return 1.0 - (double) dups() / count();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -