📄 mkkeywordhash.c
字号:
216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, 252,253,254,255};#define UpperToLower sqlite3UpperToLower/*** Comparision function for two Keyword records*/static int keywordCompare1(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; int n = pA->len - pB->len; if( n==0 ){ n = strcmp(pA->zName, pB->zName); } return n;}static int keywordCompare2(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; int n = pB->longestSuffix - pA->longestSuffix; if( n==0 ){ n = strcmp(pA->zName, pB->zName); } return n;}static int keywordCompare3(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; int n = pA->offset - pB->offset; return n;}/*** Return a KeywordTable entry with the given id*/static Keyword *findById(int id){ int i; for(i=0; i<nKeyword; i++){ if( aKeywordTable[i].id==id ) break; } return &aKeywordTable[i];}/*** This routine does the work. The generated code is printed on standard** output.*/int main(int argc, char **argv){ int i, j, k, h; int bestSize, bestCount; int count; int nChar; int totalLen = 0; int aHash[1000]; /* 1000 is much bigger than nKeyword */ /* Remove entries from the list of keywords that have mask==0 */ for(i=j=0; i<nKeyword; i++){ if( aKeywordTable[i].mask==0 ) continue; if( j<i ){ aKeywordTable[j] = aKeywordTable[i]; } j++; } nKeyword = j; /* Fill in the lengths of strings and hashes for all entries. */ for(i=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; p->len = strlen(p->zName); totalLen += p->len; p->hash = (UpperToLower[p->zName[0]]*4) ^ (UpperToLower[p->zName[p->len-1]]*3) ^ p->len; p->id = i+1; } /* Sort the table from shortest to longest keyword */ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1); /* Look for short keywords embedded in longer keywords */ for(i=nKeyword-2; i>=0; i--){ Keyword *p = &aKeywordTable[i]; for(j=nKeyword-1; j>i && p->substrId==0; j--){ Keyword *pOther = &aKeywordTable[j]; if( pOther->substrId ) continue; if( pOther->len<=p->len ) continue; for(k=0; k<=pOther->len-p->len; k++){ if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){ p->substrId = pOther->id; p->substrOffset = k; break; } } } } /* Compute the longestSuffix value for every word */ for(i=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; if( p->substrId ) continue; for(j=0; j<nKeyword; j++){ Keyword *pOther; if( j==i ) continue; pOther = &aKeywordTable[j]; if( pOther->substrId ) continue; for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){ if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ p->longestSuffix = k; } } } } /* Sort the table into reverse order by length */ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2); /* Fill in the offset for all entries */ nChar = 0; for(i=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; if( p->offset>0 || p->substrId ) continue; p->offset = nChar; nChar += p->len; for(k=p->len-1; k>=1; k--){ for(j=i+1; j<nKeyword; j++){ Keyword *pOther = &aKeywordTable[j]; if( pOther->offset>0 || pOther->substrId ) continue; if( pOther->len<=k ) continue; if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ p = pOther; p->offset = nChar - k; nChar = p->offset + p->len; p->zName += k; p->len -= k; p->prefix = k; j = i; k = p->len; } } } } for(i=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; if( p->substrId ){ p->offset = findById(p->substrId)->offset + p->substrOffset; } } /* Sort the table by offset */ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3); /* Figure out how big to make the hash table in order to minimize the ** number of collisions */ bestSize = nKeyword; bestCount = nKeyword*nKeyword; for(i=nKeyword/2; i<=2*nKeyword; i++){ for(j=0; j<i; j++) aHash[j] = 0; for(j=0; j<nKeyword; j++){ h = aKeywordTable[j].hash % i; aHash[h] *= 2; aHash[h]++; } for(j=count=0; j<i; j++) count += aHash[j]; if( count<bestCount ){ bestCount = count; bestSize = i; } } /* Compute the hash */ for(i=0; i<bestSize; i++) aHash[i] = 0; for(i=0; i<nKeyword; i++){ h = aKeywordTable[i].hash % bestSize; aKeywordTable[i].iNext = aHash[h]; aHash[h] = i+1; } /* Begin generating code */ printf("%s", zHdr); printf("/* Hash score: %d */\n", bestCount); printf("static int keywordCode(const char *z, int n){\n"); printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n", totalLen + nKeyword, nChar+1 ); printf(" static const char zText[%d] =\n", nChar+1); for(i=j=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; if( p->substrId ) continue; if( j==0 ) printf(" \""); printf("%s", p->zName); j += p->len; if( j>60 ){ printf("\"\n"); j = 0; } } printf("%s;\n", j>0 ? "\"" : " "); printf(" static const unsigned char aHash[%d] = {\n", bestSize); for(i=j=0; i<bestSize; i++){ if( j==0 ) printf(" "); printf(" %3d,", aHash[i]); j++; if( j>12 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" static const unsigned char aNext[%d] = {\n", nKeyword); for(i=j=0; i<nKeyword; i++){ if( j==0 ) printf(" "); printf(" %3d,", aKeywordTable[i].iNext); j++; if( j>12 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" static const unsigned char aLen[%d] = {\n", nKeyword); for(i=j=0; i<nKeyword; i++){ if( j==0 ) printf(" "); printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix); j++; if( j>12 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" static const unsigned short int aOffset[%d] = {\n", nKeyword); for(i=j=0; i<nKeyword; i++){ if( j==0 ) printf(" "); printf(" %3d,", aKeywordTable[i].offset); j++; if( j>12 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" static const unsigned char aCode[%d] = {\n", nKeyword); for(i=j=0; i<nKeyword; i++){ char *zToken = aKeywordTable[i].zTokenType; if( j==0 ) printf(" "); printf("%s,%*s", zToken, (int)(14-strlen(zToken)), ""); j++; if( j>=5 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" int h, i;\n"); printf(" if( n<2 ) return TK_ID;\n"); printf(" h = ((charMap(z[0])*4) ^\n" " (charMap(z[n-1])*3) ^\n" " n) %% %d;\n", bestSize); printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n"); printf(" if( aLen[i]==n &&" " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); printf(" return aCode[i];\n"); printf(" }\n"); printf(" }\n"); printf(" return TK_ID;\n"); printf("}\n"); printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n"); printf(" return keywordCode((char*)z, n);\n"); printf("}\n"); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -