📄 spell.c
字号:
Affix->data->aff = (AFFIX **) malloc(sizeof(AFFIX *) * cnt); MEMOUT(Affix->data->aff); Affix->data->naff = (uint32) cnt; cnt = 0; for (i = start; i < end; i++) if (Conf->Affix[i].replen == 0) { Affix->data->aff[cnt] = Conf->Affix + i; cnt++; }}voidNISortAffixes(IspellDict * Conf){ AFFIX *Affix; size_t i; CMPDAffix *ptr; int firstsuffix = -1; if (Conf->naffixes > 1) qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); Conf->CompoundAffix = ptr = (CMPDAffix *) malloc(sizeof(CMPDAffix) * Conf->naffixes); MEMOUT(Conf->CompoundAffix); ptr->affix = NULL; for (i = 0; i < Conf->naffixes; i++) { Affix = &(((AFFIX *) Conf->Affix)[i]); if (Affix->type == FF_SUFFIX) { if (firstsuffix < 0) firstsuffix = i; if (Affix->flagflags & FF_COMPOUNDONLYAFX) { if (!ptr->affix || strbncmp((const unsigned char *) (ptr - 1)->affix, (const unsigned char *) Affix->repl, (ptr - 1)->len)) { /* leave only unique and minimals suffixes */ ptr->affix = Affix->repl; ptr->len = Affix->replen; ptr++; } } } } ptr->affix = NULL; Conf->CompoundAffix = (CMPDAffix *) realloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); mkVoidAffix(Conf, 1, firstsuffix); mkVoidAffix(Conf, 0, firstsuffix);}static AffixNodeData *FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type){ AffixNodeData *StopLow, *StopHigh, *StopMiddle; uint8 symbol; if (node->isvoid) { /* search void affixes */ if (node->data->naff) return node->data; node = node->data->node; } while (node && *level < wrdlen) { StopLow = node->data; StopHigh = node->data + node->length; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); symbol = GETWCHAR(word, wrdlen, *level, type); if (StopMiddle->val == symbol) { (*level)++; if (StopMiddle->naff) return StopMiddle; node = StopMiddle->node; break; } else if (StopMiddle->val < symbol) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } if (StopLow >= StopHigh) break; } return NULL;}static char *CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword){ if (flagflags & FF_COMPOUNDONLYAFX) { if ((Affix->flagflags & FF_COMPOUNDONLYAFX) == 0) return NULL; } else { if (Affix->flagflags & FF_COMPOUNDONLYAFX) return NULL; } if (Affix->type == FF_SUFFIX) { strcpy(newword, word); strcpy(newword + len - Affix->replen, Affix->find); } else { strcpy(newword, Affix->find); strcat(newword, word + Affix->replen); } if (Affix->issimple) return newword; else if (Affix->isregis) { if (Affix->compile) { RS_compile(&(Affix->reg.regis), (Affix->type == FF_SUFFIX) ? 1 : 0, Affix->mask); Affix->compile = 0; } if (RS_execute(&(Affix->reg.regis), newword, -1)) return newword; } else { regmatch_t subs[2]; /* workaround for apache&linux */ int err; pg_wchar *data; size_t data_len; int dat_len; if (Affix->compile) { int wmasklen, masklen = strlen(Affix->mask); pg_wchar *mask; mask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar)); wmasklen = pg_mb2wchar_with_len(Affix->mask, mask, masklen); err = pg_regcomp(&(Affix->reg.regex), mask, wmasklen, REG_EXTENDED | REG_ICASE | REG_NOSUB); pfree(mask); if (err) { char regerrstr[ERRSTRSIZE]; pg_regerror(err, &(Affix->reg.regex), regerrstr, ERRSTRSIZE); elog(ERROR, "Regex error in '%s': %s", Affix->mask, regerrstr); } Affix->compile = 0; } /* Convert data string to wide characters */ dat_len = strlen(newword); data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(newword, data, dat_len); if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 1, subs, 0))) { pfree(data); return newword; } pfree(data); } return NULL;}static char **NormalizeSubWord(IspellDict * Conf, char *word, char flag){ AffixNodeData *suffix = NULL, *prefix = NULL; int slevel = 0, plevel = 0; int wrdlen = strlen(word), swrdlen; char **forms; char **cur; char newword[2 * MAXNORMLEN] = ""; char pnewword[2 * MAXNORMLEN] = ""; AffixNode *snode = Conf->Suffix, *pnode; int i, j; if (wrdlen > MAXNORMLEN) return NULL; strlower(word); cur = forms = (char **) palloc(MAX_NORM * sizeof(char *)); *cur = NULL; /* Check that the word itself is normal form */ if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) { *cur = pstrdup(word); cur++; *cur = NULL; } /* Find all other NORMAL forms of the 'word' (check only prefix) */ pnode = Conf->Prefix; plevel = 0; while (pnode) { prefix = FinfAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) { if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword)) { /* prefix success */ if (FindWord(Conf, newword, prefix->aff[j]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1)) { /* word search success */ *cur = pstrdup(newword); cur++; *cur = NULL; } } } pnode = prefix->node; } /* * Find all other NORMAL forms of the 'word' (check suffix and then * prefix) */ while (snode) { /* find possible suffix */ suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ for (i = 0; i < suffix->naff; i++) { if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword)) { /* suffix success */ if (FindWord(Conf, newword, suffix->aff[i]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1)) { /* word search success */ *cur = pstrdup(newword); cur++; *cur = NULL; } /* now we will look changed word with prefixes */ pnode = Conf->Prefix; plevel = 0; swrdlen = strlen(newword); while (pnode) { prefix = FinfAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) { if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword)) { /* prefix success */ int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? 0 : prefix->aff[j]->flag; if (FindWord(Conf, pnewword, ff, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1)) { /* word search success */ *cur = pstrdup(pnewword); cur++; *cur = NULL; } } } pnode = prefix->node; } } } snode = suffix->node; } if (cur == forms) { pfree(forms); return (NULL); } return (forms);}typedef struct SplitVar{ int nstem; char **stem; struct SplitVar *next;} SplitVar;static intCheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len){ while ((*ptr)->affix) { if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) { len = (*ptr)->len; (*ptr)++; return len; } (*ptr)++; } return 0;}static SplitVar *CopyVar(SplitVar * s, int makedup){ SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar)); v->stem = (char **) palloc(sizeof(char *) * (MAX_NORM)); v->next = NULL; if (s) { int i; v->nstem = s->nstem; for (i = 0; i < s->nstem; i++) v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i]; } else v->nstem = 0; return v;}static SplitVar *SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos){ SplitVar *var = NULL; SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL; SPNode *node = (snode) ? snode : Conf->Dictionary; int level = (snode) ? minpos : startpos; /* recursive * minpos==level */ int lenaff; CMPDAffix *caff; char *notprobed; notprobed = (char *) palloc(wordlen); memset(notprobed, 1, wordlen); var = CopyVar(orig, 1); while (node && level < wordlen) { StopLow = node->data; StopHigh = node->data + node->length; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if (StopMiddle->val == ((uint8 *) (word))[level]) break; else if (StopMiddle->val < ((uint8 *) (word))[level]) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } if (StopLow >= StopHigh) break; /* find word with epenthetic */ caff = Conf->CompoundAffix; while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0) { /* * there is one of compound suffixes, so check word for existings */ char buf[MAXNORMLEN]; char **subres; lenaff = level - startpos + lenaff; if (!notprobed[startpos + lenaff - 1]) continue; if (level + lenaff - 1 <= minpos) continue; memcpy(buf, word + startpos, lenaff); buf[lenaff] = '\0'; subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX); if (subres) { /* Yes, it was a word from dictionary */ SplitVar *new = CopyVar(var, 0); SplitVar *ptr = var; char **sptr = subres; notprobed[startpos + lenaff - 1] = 0; while (*sptr) { new->stem[new->nstem] = *sptr; new->nstem++; sptr++; } pfree(subres); while (ptr->next) ptr = ptr->next; ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); pfree(new->stem); pfree(new); } } /* find infinitive */ if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level]) { /* ok, we found full compoundallowed word */ if (level > minpos) { /* and its length more than minimal */ if (wordlen == level + 1) { /* well, it was last word */ var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); var->nstem++; pfree(notprobed); return var; } else { /* then we will search more big word at the same point */ SplitVar *ptr = var; while (ptr->next) ptr = ptr->next; ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); /* we can find next word */ level++; var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos); var->nstem++; node = Conf->Dictionary; startpos = level; continue; } } } level++; node = StopMiddle->node; } var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); var->nstem++; pfree(notprobed); return var;}TSLexeme *NINormalizeWord(IspellDict * Conf, char *word){ char **res = NormalizeSubWord(Conf, word, 0); TSLexeme *lcur = NULL, *lres = NULL; uint16 NVariant = 1; if (res) { char **ptr = res; lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); while (*ptr) { lcur->lexeme = *ptr; lcur->flags = 0; lcur->nvariant = NVariant++; lcur++; ptr++; } lcur->lexeme = NULL; pfree(res); } if (Conf->compoundcontrol != '\t') { int wordlen = strlen(word); SplitVar *ptr, *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); int i; while (var) { if (var->nstem > 1) { char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDWORD); if (subres) { char **subptr = subres; if (!lcur) lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); while (*subptr) { for (i = 0; i < var->nstem - 1; i++) { lcur->lexeme = (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]); lcur->flags = 0; lcur->nvariant = NVariant; lcur++; } lcur->lexeme = *subptr; lcur->flags = 0; lcur->nvariant = NVariant; lcur++; subptr++; NVariant++; } lcur->lexeme = NULL; pfree(subres); var->stem[0] = NULL; pfree(var->stem[var->nstem - 1]); } } for (i = 0; i < var->nstem && var->stem[i]; i++) pfree(var->stem[i]); ptr = var->next; pfree(var->stem); pfree(var); var = ptr; } } return lres;}static voidfreeSPNode(SPNode * node){ SPNodeData *data; if (!node) return; data = node->data; while (node->length) { freeSPNode(data->node); data++; node->length--; } free(node);}static voidfreeANode(AffixNode * node){ AffixNodeData *data; if (!node) return; data = node->data; while (node->length) { freeANode(data->node); if (data->naff) free(data->aff); data++; node->length--; } free(node);}voidNIFree(IspellDict * Conf){ int i; AFFIX *Affix = (AFFIX *) Conf->Affix; char **aff = Conf->AffixData; if (aff) { while (*aff) { free(*aff); aff++; } free(Conf->AffixData); } for (i = 0; i < Conf->naffixes; i++) { if (Affix[i].compile == 0) { if (Affix[i].isregis) RS_free(&(Affix[i].reg.regis)); else pg_regfree(&(Affix[i].reg.regex)); } free(Affix[i].mask); free(Affix[i].find); free(Affix[i].repl); } if (Conf->Spell) { for (i = 0; i < Conf->nspell; i++) free(Conf->Spell[i].word); free(Conf->Spell); } if (Conf->Affix) free(Conf->Affix); if (Conf->CompoundAffix) free(Conf->CompoundAffix); freeSPNode(Conf->Dictionary); freeANode(Conf->Suffix); freeANode(Conf->Prefix); memset((void *) Conf, 0, sizeof(IspellDict)); return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -