📄 hashgen.html
字号:
Up:<a rel=up href="#Example">Example</a><br><h3>Generated Test Source</h3><br><pre>// Test for hashed lookup code generated by hashgen 1.0b.// Measures the average time of both successful and unsuccessful lookups.#include "cxx_keywords.h"#include <ctime>#include <iostream>#include <iomanip>using namespace std;using namespace Scanner;#ifndef CLK_TCK#define CLK_TCK CLOCKS_PER_SEC#endifint main() { clock_t tick; double time; unsigned count; unsigned i; // Hit test. If/goto overhead is negligible, so it's okay to test for // both correctness and speed at the same time. There is no sense in // checking the key pointed to by the lookup function's return value // after a successful search, since the only place where a non-null // pointer is returned is right after successful strcmp() or memcmp(). count = 0; tick = clock(); do { for (i = 0; i < 21276; i++) { if (TokenHash::lookup("asm", 3) == 0) goto err_miss; if (TokenHash::lookup("auto", 4) == 0) goto err_miss; if (TokenHash::lookup("break", 5) == 0) goto err_miss; if (TokenHash::lookup("case", 4) == 0) goto err_miss; if (TokenHash::lookup("catch", 5) == 0) goto err_miss; if (TokenHash::lookup("char", 4) == 0) goto err_miss; if (TokenHash::lookup("class", 5) == 0) goto err_miss; if (TokenHash::lookup("const", 5) == 0) goto err_miss; if (TokenHash::lookup("continue", 8) == 0) goto err_miss; if (TokenHash::lookup("default", 7) == 0) goto err_miss; if (TokenHash::lookup("delete", 6) == 0) goto err_miss; if (TokenHash::lookup("do", 2) == 0) goto err_miss; if (TokenHash::lookup("double", 6) == 0) goto err_miss; if (TokenHash::lookup("else", 4) == 0) goto err_miss; if (TokenHash::lookup("enum", 4) == 0) goto err_miss; if (TokenHash::lookup("extern", 6) == 0) goto err_miss; if (TokenHash::lookup("float", 5) == 0) goto err_miss; if (TokenHash::lookup("for", 3) == 0) goto err_miss; if (TokenHash::lookup("friend", 6) == 0) goto err_miss; if (TokenHash::lookup("goto", 4) == 0) goto err_miss; if (TokenHash::lookup("if", 2) == 0) goto err_miss; if (TokenHash::lookup("inline", 6) == 0) goto err_miss; if (TokenHash::lookup("int", 3) == 0) goto err_miss; if (TokenHash::lookup("long", 4) == 0) goto err_miss; if (TokenHash::lookup("new", 3) == 0) goto err_miss; if (TokenHash::lookup("operator", 8) == 0) goto err_miss; if (TokenHash::lookup("overload", 8) == 0) goto err_miss; if (TokenHash::lookup("private", 7) == 0) goto err_miss; if (TokenHash::lookup("protected", 9) == 0) goto err_miss; if (TokenHash::lookup("public", 6) == 0) goto err_miss; if (TokenHash::lookup("register", 8) == 0) goto err_miss; if (TokenHash::lookup("return", 6) == 0) goto err_miss; if (TokenHash::lookup("short", 5) == 0) goto err_miss; if (TokenHash::lookup("signed", 6) == 0) goto err_miss; if (TokenHash::lookup("sizeof", 6) == 0) goto err_miss; if (TokenHash::lookup("static", 6) == 0) goto err_miss; if (TokenHash::lookup("struct", 6) == 0) goto err_miss; if (TokenHash::lookup("switch", 6) == 0) goto err_miss; if (TokenHash::lookup("template", 8) == 0) goto err_miss; if (TokenHash::lookup("this", 4) == 0) goto err_miss; if (TokenHash::lookup("typedef", 7) == 0) goto err_miss; if (TokenHash::lookup("union", 5) == 0) goto err_miss; if (TokenHash::lookup("unsigned", 8) == 0) goto err_miss; if (TokenHash::lookup("virtual", 7) == 0) goto err_miss; if (TokenHash::lookup("void", 4) == 0) goto err_miss; if (TokenHash::lookup("volatile", 8) == 0) goto err_miss; if (TokenHash::lookup("while", 5) == 0) goto err_miss; } count++; time = (clock() - tick) / (double)CLK_TCK; } while ((time < 0.5) && (count < 1000000)); cout << "successful search time " << setw(6) << int(time * 1e9 / 21276 / 47 / count) << "ns (" << setw(0) << int(999972 * count / time) << " searches/sec)\n"; // Miss test. Characters of the test keys are random within range // from min-expected-char to max-expected-char, their length from 0 // to a bit more than maximum length of the existing keys. // The results should not be taken too seriously, the keys are too // random, so strcmp() or memcmp() is not called most of the time // because the calculated hash values are out of range. count = 0; tick = clock(); do { for (i = 0; i < 21276; i++) { if (TokenHash::lookup("<T", 2)) goto err_hit; if (TokenHash::lookup("hPHv]", 5)) goto err_hit; if (TokenHash::lookup("UlkW", 4)) goto err_hit; if (TokenHash::lookup("", 0)) goto err_hit; if (TokenHash::lookup("h16QrP@", 7)) goto err_hit; if (TokenHash::lookup("=Wk", 3)) goto err_hit; if (TokenHash::lookup("`JN4?", 5)) goto err_hit; if (TokenHash::lookup("0", 1)) goto err_hit; if (TokenHash::lookup("d1[GsilcF5", 10)) goto err_hit; if (TokenHash::lookup("3jx>", 4)) goto err_hit; if (TokenHash::lookup("77EDG5i", 7)) goto err_hit; if (TokenHash::lookup("O<1GP", 5)) goto err_hit; if (TokenHash::lookup("w9P@9H", 6)) goto err_hit; if (TokenHash::lookup("v0Edr", 5)) goto err_hit; if (TokenHash::lookup("", 0)) goto err_hit; if (TokenHash::lookup("", 0)) goto err_hit; if (TokenHash::lookup("??wFTtGZb", 9)) goto err_hit; if (TokenHash::lookup("bWycwd_jmp", 10)) goto err_hit; if (TokenHash::lookup(":", 1)) goto err_hit; if (TokenHash::lookup("@:mteniZg", 9)) goto err_hit; if (TokenHash::lookup("?fRm2e=", 7)) goto err_hit; if (TokenHash::lookup("6", 1)) goto err_hit; if (TokenHash::lookup("Ei", 2)) goto err_hit; if (TokenHash::lookup("b6mT_8G<H", 9)) goto err_hit; if (TokenHash::lookup("yB", 2)) goto err_hit; if (TokenHash::lookup("Vd", 2)) goto err_hit; if (TokenHash::lookup("BbGxnm0Xdz", 10)) goto err_hit; if (TokenHash::lookup("7x5n_pJl", 8)) goto err_hit; if (TokenHash::lookup("R8JjBHe7", 8)) goto err_hit; if (TokenHash::lookup("O?", 2)) goto err_hit; if (TokenHash::lookup("6?gt", 4)) goto err_hit; if (TokenHash::lookup("", 0)) goto err_hit; if (TokenHash::lookup(";O8R", 4)) goto err_hit; if (TokenHash::lookup("6X2e6L@ZW", 9)) goto err_hit; if (TokenHash::lookup("tGDve", 5)) goto err_hit; if (TokenHash::lookup("R9CutRJ", 7)) goto err_hit; if (TokenHash::lookup("=SaE[9TaJ@", 10)) goto err_hit; if (TokenHash::lookup("", 0)) goto err_hit; if (TokenHash::lookup("EEcmFFmCu", 9)) goto err_hit; if (TokenHash::lookup("", 0)) goto err_hit; if (TokenHash::lookup("5", 1)) goto err_hit; if (TokenHash::lookup("Z", 1)) goto err_hit; if (TokenHash::lookup("CfI", 3)) goto err_hit; if (TokenHash::lookup("\\lP", 3)) goto err_hit; if (TokenHash::lookup("MYKg\\Dl1Z6", 10)) goto err_hit; if (TokenHash::lookup("L6crI]N", 7)) goto err_hit; if (TokenHash::lookup("LQtKd", 5)) goto err_hit; } count++; time = (clock() - tick) / (double)CLK_TCK; } while ((time < 0.5) && (count < 1000000)); cout << "search failure time " << setw(6) << int(time * 1e9 / 21276 / 47 / count) << "ns (" << setw(0) << int(999972 * count / time) << " searches/sec)\n"; return 0;err_hit: cerr << "cxx_keywords.cxx: non-existing key found\n"; exit(1);err_miss: cerr << "cxx_keywords.cxx: key not found\n"; exit(1);}</pre><p><hr>Node:<a name="Machine%20Code">Machine Code</a>,Next:<a rel=next href="#Tips">Tips</a>,Previous:<a rel=previous href="#Generated%20Test%20Source">Generated Test Source</a>,Up:<a rel=up href="#Example">Example</a><br><h3>Generated Machine Code</h3><p>Bellow is a frament of the assembly code generated by GCC 2.95.3(i386, FreeBSD 4.4) for the following line of the test source:<br><pre> if (TokenHash::lookup("catch", 5) == 0) goto err_miss;</pre><p>The lookup function is inlined and called with constant arguments. GCC did not generate the code to check length of the key at all sinceit's known at compile time. <code>.LC2</code> contains "catch", it'slong enough to include all 3 hash positions, for shorter stringsthe compliler generated access only to included positions (linesmarked with "*").<br><pre> movzbl .LC2+3,%eax movzbl _Q27Scanner9TokenHash$charWeights-48(%eax),%edx * movzbl .LC2+1,%eax movzbl _Q27Scanner9TokenHash$charWeights-48(%eax),%eax * leal 5(%eax,%edx),%edx movzbl .LC2,%eax movzbl _Q27Scanner9TokenHash$charWeights-48(%eax),%eax * addl %eax,%edx cmpl $67,%edx ja .L540 movzbl _Q27Scanner9TokenHash$hashTable(%edx),%edx cmpl $47,%edx je .L540 leal 0(,%edx,8),%eax leal _Q27Scanner9TokenHash$tokens(%eax),%ebx movl _Q27Scanner9TokenHash$tokens(%eax),%edx movb (%edx),%al cmpb %al,.LC2 jne .L577 addl $-8,%esp pushl %edx pushl $.LC2 call strcmp movl %eax,%edx addl $16,%esp movl %ebx,%eax testl %edx,%edx je .L567.L577: xorl %eax,%eax.L567: testl %eax,%eax je .L540</pre><p>If the lookup function is not inline and the invocation context isnot known, a jump table is generated for the switch on the key's length(<code>jmp *.L21(,%edx,4)</code>):<br><pre>lookup__Q27Scanner9TokenHashPCcUi: pushl %ebp movl %esp,%ebp subl $20,%esp pushl %ebx movl 8(%ebp),%ecx movl 12(%ebp),%edx cmpl $9,%edx ja .L24 jmp *.L21(,%edx,4) .p2align 2,0x90 .section .rodata .p2align 2 .p2align 2.L21: .long .L9 .long .L19 .long .L18 .long .L18 .long .L16 .long .L16 .long .L16 .long .L16 .long .L16 .long .L16.text .p2align 2,0x90.L16: movzbl 3(%ecx),%eax movzbl _Q27Scanner9TokenHash$charWeights-48(%eax),%eax addl %eax,%edx.L18: movzbl 1(%ecx),%eax movzbl _Q27Scanner9TokenHash$charWeights-48(%eax),%eax addl %eax,%edx.L19: movzbl (%ecx),%eax movzbl _Q27Scanner9TokenHash$charWeights-48(%eax),%eax addl %eax,%edx.L9: cmpl $67,%edx ja .L24 movzbl _Q27Scanner9TokenHash$hashTable(%edx),%edx cmpl $47,%edx je .L24 leal 0(,%edx,8),%eax leal _Q27Scanner9TokenHash$tokens(%eax),%ebx movl _Q27Scanner9TokenHash$tokens(%eax),%edx movb (%edx),%al cmpb %al,(%ecx) jne .L24 addl $-8,%esp pushl %edx pushl %ecx call strcmp testl %eax,%eax jne .L24 movl %ebx,%eax jmp .L26 .p2align 2,0x90.L24: xorl %eax,%eax.L26: movl -24(%ebp),%ebx leave ret</pre><p><hr>Node:<a name="Tips">Tips</a>,Next:<a rel=next href="#Bugs">Bugs</a>,Previous:<a rel=previous href="#Machine%20Code">Machine Code</a>,Up:<a rel=up href="#Top">Top</a><br><h2>Tips</h2><p><hr>Node:<a name="Bugs">Bugs</a>,Next:<a rel=next href="#Acknowledgements">Acknowledgements</a>,Previous:<a rel=previous href="#Tips">Tips</a>,Up:<a rel=up href="#Top">Top</a><br><h2>Known Bugs and Limitations of <code>hashgen</code></h2><p>The keys may be of any reasonable length, but only the first 32 charactersare used for hashing, they must be unique, otherwise the generated hashfunction will not be perfect.<p><code>hashgen</code> consumes lots of RAM when the keyset is large (~20Mbfor 3,000 keys, for larger keysets it switches to a slower and lessmemory-hungry mode (~4Mb for 10,000 keys)). This estimation is very inaccurate, the actual figures depend on theaverage length of a key and its assotiated data, character code range,etc.<p>There are many misspellings, misplaced definite articles, andother mistakes and inaccuracies in this manual. I would be gratefulto anyone willing to help me to clean this mess up.<p><hr>Node:<a name="Acknowledgements">Acknowledgements</a>,Previous:<a rel=previous href="#Bugs">Bugs</a>,Up:<a rel=up href="#Top">Top</a><br><h2>Acknowledgements</h2><p>The <code>hashgen</code> utility was written by Vladimir Shiryaev.<p>The program does basically the same thing and uses the samebasic algorithm (with a few improvements) as the <code>gperf</code>utility by Douglas C. Schmidt. The differencies in functionality are briefly outlined inthe Overview Chapter (<a href="#Overview">Overview</a>).<p>As Doug says in the <code>gperf</code> manual, the general idea for theperfect hash function generator was inspired by Keith Bostic'salgorithm, created at the University of California, Irvine.</body></html>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -