📄 spell.c
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "postgres.h"#include "spell.h"#define MAX_NORM 1024#define MAXNORMLEN 256#define ERRSTRSIZE 1024#define STRNCASECMP(x,y) pg_strncasecmp(x, y, strlen(y))#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))static intcmpspell(const void *s1, const void *s2){ return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));}static intcmpspellaffix(const void *s1, const void *s2){ return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));}static voidstrlower(char *str){ unsigned char *ptr = (unsigned char *) str; while (*ptr) { *ptr = tolower(*ptr); ptr++; }}static char *strnduplicate(char *s, int len){ char *d = (char *) palloc(len + 1); memcpy(d, s, len); d[len] = '\0'; return d;}/* backward string compare for suffix tree operations */static intstrbcmp(const unsigned char *s1, const unsigned char *s2){ int l1 = strlen((const char *) s1) - 1, l2 = strlen((const char *) s2) - 1; while (l1 >= 0 && l2 >= 0) { if (s1[l1] < s2[l2]) return -1; if (s1[l1] > s2[l2]) return 1; l1--; l2--; } if (l1 < l2) return -1; if (l1 > l2) return 1; return 0;}static intstrbncmp(const unsigned char *s1, const unsigned char *s2, size_t count){ int l1 = strlen((const char *) s1) - 1, l2 = strlen((const char *) s2) - 1, l = count; while (l1 >= 0 && l2 >= 0 && l > 0) { if (s1[l1] < s2[l2]) return -1; if (s1[l1] > s2[l2]) return 1; l1--; l2--; l--; } if (l == 0) return 0; if (l1 < l2) return -1; if (l1 > l2) return 1; return 0;}static intcmpaffix(const void *s1, const void *s2){ const AFFIX *a1 = (const AFFIX *) s1; const AFFIX *a2 = (const AFFIX *) s2; if (a1->type < a2->type) return -1; if (a1->type > a2->type) return 1; if (a1->type == FF_PREFIX) return strcmp(a1->repl, a2->repl); else return strbcmp((const unsigned char *) a1->repl, (const unsigned char *) a2->repl);}intNIAddSpell(IspellDict * Conf, const char *word, const char *flag){ if (Conf->nspell >= Conf->mspell) { if (Conf->mspell) { Conf->mspell += 1024 * 20; Conf->Spell = (SPELL *) realloc(Conf->Spell, Conf->mspell * sizeof(SPELL)); } else { Conf->mspell = 1024 * 20; Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL)); } MEMOUT(Conf->Spell); } Conf->Spell[Conf->nspell].word = strdup(word); MEMOUT(Conf->Spell[Conf->nspell].word); strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16); Conf->nspell++; return (0);}intNIImportDictionary(IspellDict * Conf, const char *filename){ char str[BUFSIZ]; FILE *dict; if (!(dict = fopen(filename, "r"))) return (1); while (fgets(str, sizeof(str), dict)) { char *s; const char *flag; flag = NULL; if ((s = strchr(str, '/'))) { *s++ = '\0'; flag = s; while (*s) { if (isprint((unsigned char) *s) && !isspace((unsigned char) *s)) s++; else { *s = '\0'; break; } } } else flag = ""; strlower(str); /* Dont load words if first letter is not required */ /* It allows to optimize loading at search time */ s = str; while (*s) { if (*s == '\r' || *s == '\n') *s = '\0'; s++; } NIAddSpell(Conf, str, flag); } fclose(dict); return (0);}static intFindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly){ SPNode *node = Conf->Dictionary; SPNodeData *StopLow, *StopHigh, *StopMiddle; uint8 *ptr = (uint8 *) word; while (node && *ptr) { StopLow = node->data; StopHigh = node->data + node->length; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if (StopMiddle->val == *ptr) { if (*(ptr + 1) == '\0' && StopMiddle->isword) { if (compoundonly && !StopMiddle->compoundallow) return 0; if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL)) return 1; } node = StopMiddle->node; ptr++; break; } else if (StopMiddle->val < *ptr) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } if (StopLow >= StopHigh) break; } return 0;}intNIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type){ if (Conf->naffixes >= Conf->maffixes) { if (Conf->maffixes) { Conf->maffixes += 16; Conf->Affix = (AFFIX *) realloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); } else { Conf->maffixes = 16; Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX)); } MEMOUT(Conf->Affix); } if (strcmp(mask, ".") == 0) { Conf->Affix[Conf->naffixes].issimple = 1; Conf->Affix[Conf->naffixes].isregis = 0; Conf->Affix[Conf->naffixes].mask = strdup(""); } else if (RS_isRegis(mask)) { Conf->Affix[Conf->naffixes].issimple = 0; Conf->Affix[Conf->naffixes].isregis = 1; Conf->Affix[Conf->naffixes].mask = strdup(mask); } else { Conf->Affix[Conf->naffixes].issimple = 0; Conf->Affix[Conf->naffixes].isregis = 0; Conf->Affix[Conf->naffixes].mask = (char *) malloc(strlen(mask) + 2); if (type == FF_SUFFIX) sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask); else sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask); } MEMOUT(Conf->Affix[Conf->naffixes].mask); Conf->Affix[Conf->naffixes].compile = 1; Conf->Affix[Conf->naffixes].flagflags = flagflags; Conf->Affix[Conf->naffixes].flag = flag; Conf->Affix[Conf->naffixes].type = type; Conf->Affix[Conf->naffixes].find = strdup(find); MEMOUT(Conf->Affix[Conf->naffixes].find); Conf->Affix[Conf->naffixes].repl = strdup(repl); MEMOUT(Conf->Affix[Conf->naffixes].repl); Conf->Affix[Conf->naffixes].replen = strlen(repl); Conf->naffixes++; return (0);}static char *remove_spaces(char *dist, char *src){ char *d, *s; d = dist; s = src; while (*s) { if (*s != ' ' && *s != '-' && *s != '\t') { *d = *s; d++; } s++; } *d = 0; return (dist);}intNIImportAffixes(IspellDict * Conf, const char *filename){ char str[BUFSIZ]; char mask[BUFSIZ]; char find[BUFSIZ]; char repl[BUFSIZ]; char *s; int i; int suffixes = 0; int prefixes = 0; int flag = 0; char flagflags = 0; FILE *affix; if (!(affix = fopen(filename, "r"))) return (1); Conf->compoundcontrol = '\t'; while (fgets(str, sizeof(str), affix)) { if (STRNCASECMP(str, "compoundwords") == 0) { s = strchr(str, 'l'); if (s) { while (*s != ' ') s++; while (*s == ' ') s++; Conf->compoundcontrol = *s; continue; } } if (STRNCASECMP(str, "suffixes") == 0) { suffixes = 1; prefixes = 0; continue; } if (STRNCASECMP(str, "prefixes") == 0) { suffixes = 0; prefixes = 1; continue; } if (STRNCASECMP(str, "flag ") == 0) { s = str + 5; flagflags = 0; while (*s == ' ') s++; if (*s == '*') { flagflags |= FF_CROSSPRODUCT; s++; } else if (*s == '~') { flagflags |= FF_COMPOUNDONLYAFX; s++; } if (*s == '\\') s++; flag = (unsigned char) *s; continue; } if ((!suffixes) && (!prefixes)) continue; if ((s = strchr(str, '#'))) *s = 0; if (!*str) continue; strlower(str); strcpy(mask, ""); strcpy(find, ""); strcpy(repl, ""); i = sscanf(str, "%[^>\n]>%[^,\n],%[^\n]", mask, find, repl); remove_spaces(str, repl); strcpy(repl, str); remove_spaces(str, find); strcpy(find, str); remove_spaces(str, mask); strcpy(mask, str); switch (i) { case 3: break; case 2: if (*find != '\0') { strcpy(repl, find); strcpy(find, ""); } break; default: continue; } NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); } fclose(affix); return (0);}static intMergeAffix(IspellDict * Conf, int a1, int a2){ int naffix = 0; char **ptr = Conf->AffixData; while (*ptr) { naffix++; ptr++; } Conf->AffixData = (char **) realloc(Conf->AffixData, (naffix + 2) * sizeof(char *)); MEMOUT(Conf->AffixData); ptr = Conf->AffixData + naffix; *ptr = malloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ ); MEMOUT(ptr); sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]); ptr++; *ptr = '\0'; return naffix;}static SPNode *mkSPNode(IspellDict * Conf, int low, int high, int level){ int i; int nchar = 0; char lastchar = '\0'; SPNode *rs; SPNodeData *data; int lownew = low; for (i = low; i < high; i++) if (Conf->Spell[i].p.d.len > level && lastchar != Conf->Spell[i].word[level]) { nchar++; lastchar = Conf->Spell[i].word[level]; } if (!nchar) return NULL; rs = (SPNode *) malloc(SPNHRDSZ + nchar * sizeof(SPNodeData)); MEMOUT(rs); memset(rs, 0, SPNHRDSZ + nchar * sizeof(SPNodeData)); rs->length = nchar; data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) if (Conf->Spell[i].p.d.len > level) { if (lastchar != Conf->Spell[i].word[level]) { if (lastchar) { data->node = mkSPNode(Conf, lownew, i, level + 1); lownew = i; data++; } lastchar = Conf->Spell[i].word[level]; } data->val = ((uint8 *) (Conf->Spell[i].word))[level]; if (Conf->Spell[i].p.d.len == level + 1) { if (data->isword && data->affix != Conf->Spell[i].p.d.affix) { /* * fprintf(stderr,"Word already exists: %s (affixes: '%s' * and '%s')\n", Conf->Spell[i].word, * Conf->AffixData[data->affix], * Conf->AffixData[Conf->Spell[i].p.d.affix] ); */ /* MergeAffix called a few times */ data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix); } else data->affix = Conf->Spell[i].p.d.affix; data->isword = 1; if (strchr(Conf->AffixData[data->affix], Conf->compoundcontrol)) data->compoundallow = 1; } } data->node = mkSPNode(Conf, lownew, high, level + 1); return rs;}voidNISortDictionary(IspellDict * Conf){ size_t i; int naffix = 3; /* compress affixes */ qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix); for (i = 1; i < Conf->nspell; i++) if (strcmp(Conf->Spell[i].p.flag, Conf->Spell[i - 1].p.flag)) naffix++; Conf->AffixData = (char **) malloc(naffix * sizeof(char *)); MEMOUT(Conf->AffixData); memset(Conf->AffixData, 0, naffix * sizeof(char *)); naffix = 1; Conf->AffixData[0] = strdup(""); MEMOUT(Conf->AffixData[0]); Conf->AffixData[1] = strdup(Conf->Spell[0].p.flag); MEMOUT(Conf->AffixData[1]); Conf->Spell[0].p.d.affix = 1; Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word); for (i = 1; i < Conf->nspell; i++) { if (strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix])) { naffix++; Conf->AffixData[naffix] = strdup(Conf->Spell[i].p.flag); MEMOUT(Conf->AffixData[naffix]); } Conf->Spell[i].p.d.affix = naffix; Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word); } qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell); Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); for (i = 0; i < Conf->nspell; i++) free(Conf->Spell[i].word); free(Conf->Spell); Conf->Spell = NULL;}static AffixNode *mkANode(IspellDict * Conf, int low, int high, int level, int type){ int i; int nchar = 0; uint8 lastchar = '\0'; AffixNode *rs; AffixNodeData *data; int lownew = low; for (i = low; i < high; i++) if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) { nchar++; lastchar = GETCHAR(Conf->Affix + i, level, type); } if (!nchar) return NULL; rs = (AffixNode *) malloc(ANHRDSZ + nchar * sizeof(AffixNodeData)); MEMOUT(rs); memset(rs, 0, ANHRDSZ + nchar * sizeof(AffixNodeData)); rs->length = nchar; data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) if (Conf->Affix[i].replen > level) { if (lastchar != GETCHAR(Conf->Affix + i, level, type)) { if (lastchar) { data->node = mkANode(Conf, lownew, i, level + 1, type); lownew = i; data++; } lastchar = GETCHAR(Conf->Affix + i, level, type); } data->val = GETCHAR(Conf->Affix + i, level, type); if (Conf->Affix[i].replen == level + 1) { /* affix stopped */ if (!data->naff) { data->aff = (AFFIX **) malloc(sizeof(AFFIX *) * (high - i + 1)); MEMOUT(data->aff); } data->aff[data->naff] = Conf->Affix + i; data->naff++; } } data->node = mkANode(Conf, lownew, high, level + 1, type); return rs;}static voidmkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix){ int i, cnt = 0; int start = (issuffix) ? startsuffix : 0; int end = (issuffix) ? Conf->naffixes : startsuffix; AffixNode *Affix = (AffixNode *) malloc(ANHRDSZ + sizeof(AffixNodeData)); MEMOUT(Affix); memset(Affix, 0, ANHRDSZ + sizeof(AffixNodeData)); Affix->length = 1; Affix->isvoid = 1; if (issuffix) { Affix->data->node = Conf->Suffix; Conf->Suffix = Affix; } else { Affix->data->node = Conf->Prefix; Conf->Prefix = Affix; } for (i = start; i < end; i++) if (Conf->Affix[i].replen == 0) cnt++; if (cnt == 0) return;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -