📄 checksum_test.cc
字号:
} double fullness() { return (double) h_buckets_full / (1 << h_bits); } double collisions() { return (double) colls() / fstats.unique; } double coverage() { return (double) h_buckets_full / uniqueness() / count(); } double compression() { return 1.0 - coverage(); } double time() { return (double) accum_millis / accum_iters; } double score() { return h_score; } void set_score(double min_compression, double min_time) { h_score = (compression() - 0.99 * min_compression) * (time() - 0.99 * min_time); } double total_time() { return accum_time; } int total_count() { return accum_count; } int total_dups() { return accum_dups; } int total_colls() { return accum_dups; } void stat() { accum_time += time(); accum_count += count(); accum_dups += dups(); accum_colls += colls(); accum_size += test_size; } void print() { if (fstats.count != count()) { fprintf(stderr, "internal error: %d != %d\n", fstats.count, count()); abort(); } printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n", test_name, cksum_size, cksum_skip, count(), 100.0 * uniqueness(), h_buckets_full, 100.0 * fullness(), 100.0 * collisions(), 100.0 * coverage(), h_bits, 0.001 * accum_iters * test_size / accum_millis, accum_iters); } int size_log2 (int slots) { int bits = bitsof<word_type>() - 1; int i; for (i = 3; i <= bits; i += 1) { if (slots <= (1 << i)) { return i - compaction; } } return bits; } void new_table(int entries) { t_entries = entries; h_bits = size_log2(entries); int n = 1 << h_bits; s_bits = bitsof<word_type>() - h_bits; s_mask = n - 1; hash_table = new char[n / 8]; memset(hash_table, 0, n / 8); } int get_table_bit(int i) { return hash_table[i/8] & (1 << i%8); } int set_table_bit(int i) { return hash_table[i/8] |= (1 << i%8); } void summarize_table() { int n = 1 << h_bits; int f = 0; for (int i = 0; i < n; i++) { if (get_table_bit(i)) { f++; } } h_buckets_full = f; } void get(const uint8_t* buf, const int buf_size, int test_iters) { rabin_karp<SELF> test; //adler32_cksum<SELF> test; hash_type hash; const uint8_t *ptr; const uint8_t *end; int last_offset; int periods; int stop; test_size = buf_size; last_offset = buf_size - cksum_size; if (last_offset < 0) { periods = 0; n_steps = 0; n_incrs = 0; stop = -cksum_size; } else { periods = last_offset / cksum_skip; n_steps = periods + 1; n_incrs = last_offset + 1; stop = last_offset - (periods + 1) * cksum_skip; } // Compute file stats once. if (fstats.unique_values == 0) { if (cksum_skip == 1) { for (int i = 0; i <= buf_size - cksum_size; i++) { fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i); } } else { ptr = buf + last_offset; end = buf + stop; for (; ptr != end; ptr -= cksum_skip) { fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr); } } fstats.freeze(); } long start_test = get_millisecs_now(); if (cksum_skip != 1) { new_table(n_steps); for (int i = 0; i < test_iters; i++) { ptr = buf + last_offset; end = buf + stop; for (; ptr != end; ptr -= cksum_skip) { set_table_bit(hash(test.step(ptr), s_bits, s_mask)); } } summarize_table(); } stop = buf_size - cksum_size + 1; if (stop < 0) { stop = 0; } if (cksum_skip == 1) { new_table(n_incrs); for (int i = 0; i < test_iters; i++) { ptr = buf; end = buf + stop; if (ptr != end) { set_table_bit(hash(test.state0(ptr++), s_bits, s_mask)); } for (; ptr != end; ptr++) { Word w = test.incr(ptr); assert(w == test.step(ptr)); set_table_bit(hash(w, s_bits, s_mask)); } } summarize_table(); } accum_iters += test_iters; accum_millis += get_millisecs_now() - start_test; }};template <typename Word>void print_array(const char *tname) { printf("static const %s hash_multiplier[64] = {\n", tname); Word p = 1; for (int i = 0; i < 64; i++) { printf(" %uU,\n", p); p *= good_word<Word>(); } printf("};\n", tname);}int main(int argc, char** argv) { int i; uint8_t *buf = NULL; size_t buf_len = 0; int ret; if (argc <= 1) { fprintf(stderr, "usage: %s file ...\n", argv[0]); return 1; } //print_array<uint32_t>("uint32_t");#define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \ _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \ (#T "_" #Z "_" #S "_" #P "_" #H "_" #C)#if 0 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \ TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \#endif#define TESTS(SKIP) \ TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \ TEST(uint32_t, 9, SKIP, plain, hhash, 3) #define TESTS_ALL(SKIP) \ TEST(uint32_t, 3, SKIP, plain, hhash, 0); \ TEST(uint32_t, 3, SKIP, plain, hhash, 1); \ TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \ TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \ TEST(uint32_t, 5, SKIP, plain, hhash, 0); \ TEST(uint32_t, 5, SKIP, plain, hhash, 1); \ TEST(uint32_t, 8, SKIP, plain, hhash, 0); \ TEST(uint32_t, 8, SKIP, plain, hhash, 1); \ TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \ TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \ TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 13, SKIP, plain, hhash, 0); \ TEST(uint32_t, 13, SKIP, plain, hhash, 1); \ TEST(uint32_t, 15, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 15, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 16, SKIP, plain, hhash, 0); /* x */ \ TEST(uint32_t, 16, SKIP, plain, hhash, 1); /* x */ \ TEST(uint32_t, 21, SKIP, plain, hhash, 0); \ TEST(uint32_t, 21, SKIP, plain, hhash, 1); \ TEST(uint32_t, 34, SKIP, plain, hhash, 0); \ TEST(uint32_t, 34, SKIP, plain, hhash, 1); \ TEST(uint32_t, 55, SKIP, plain, hhash, 0); \ TEST(uint32_t, 55, SKIP, plain, hhash, 1) TESTS(1); // *// TESTS(2); // *// TESTS(3); // *// TESTS(5); // *// TESTS(8); // *// TESTS(9);// TESTS(11);// TESTS(13); // * TESTS(15);// TESTS(16);// TESTS(21); // *// TESTS(34); // *// TESTS(55); // *// TESTS(89); // * for (i = 1; i < argc; i++) { if ((ret = read_whole_file(argv[i], & buf, & buf_len))) { return 1; } fprintf(stderr, "file %s is %zu bytes\n", argv[i], buf_len); double min_time = -1.0; double min_compression = 0.0; for (vector<test_result_base*>::iterator i = all_tests.begin(); i != all_tests.end(); ++i) { test_result_base *test = *i; test->reset(); int iters = 100; long start_test = get_millisecs_now(); do { test->get(buf, buf_len, iters); iters *= 3; iters /= 2; } while (get_millisecs_now() - start_test < 2000); test->stat(); if (min_time < 0.0) { min_compression = test->compression(); min_time = test->time(); } if (min_time > test->time()) { min_time = test->time(); } if (min_compression > test->compression()) { min_compression = test->compression(); } test->print(); }// for (vector<test_result_base*>::iterator i = all_tests.begin();// i != all_tests.end(); ++i) {// test_result_base *test = *i;// test->set_score(min_compression, min_time);// } // sort(all_tests.begin(), all_tests.end(), compare_h()); // for (vector<test_result_base*>::iterator i = all_tests.begin();// i != all_tests.end(); ++i) {// test_result_base *test = *i;// test->print();// } free(buf); buf = NULL; } return 0; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -