📄 spell.c

📁 PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	Affix->data->aff = (AFFIX **) malloc(sizeof(AFFIX *) * cnt);	MEMOUT(Affix->data->aff);	Affix->data->naff = (uint32) cnt;	cnt = 0;	for (i = start; i < end; i++)		if (Conf->Affix[i].replen == 0)		{			Affix->data->aff[cnt] = Conf->Affix + i;			cnt++;		}}voidNISortAffixes(IspellDict * Conf){	AFFIX	   *Affix;	size_t		i;	CMPDAffix  *ptr;	int			firstsuffix = -1;	if (Conf->naffixes > 1)		qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);	Conf->CompoundAffix = ptr = (CMPDAffix *) malloc(sizeof(CMPDAffix) * Conf->naffixes);	MEMOUT(Conf->CompoundAffix);	ptr->affix = NULL;	for (i = 0; i < Conf->naffixes; i++)	{		Affix = &(((AFFIX *) Conf->Affix)[i]);		if (Affix->type == FF_SUFFIX)		{			if (firstsuffix < 0)				firstsuffix = i;			if (Affix->flagflags & FF_COMPOUNDONLYAFX)			{				if (!ptr->affix ||					strbncmp((const unsigned char *) (ptr - 1)->affix,							 (const unsigned char *) Affix->repl,							 (ptr - 1)->len))				{					/* leave only unique and minimals suffixes */					ptr->affix = Affix->repl;					ptr->len = Affix->replen;					ptr++;				}			}		}	}	ptr->affix = NULL;	Conf->CompoundAffix = (CMPDAffix *) realloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));	Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);	Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);	mkVoidAffix(Conf, 1, firstsuffix);	mkVoidAffix(Conf, 0, firstsuffix);}static AffixNodeData *FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type){	AffixNodeData *StopLow,			   *StopHigh,			   *StopMiddle;	uint8		symbol;	if (node->isvoid)	{							/* search void affixes */		if (node->data->naff)			return node->data;		node = node->data->node;	}	while (node && *level < wrdlen)	{		StopLow = node->data;		StopHigh = node->data + node->length;		while (StopLow < StopHigh)		{			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);			symbol = GETWCHAR(word, wrdlen, *level, type);			if (StopMiddle->val == symbol)			{				(*level)++;				if (StopMiddle->naff)					return StopMiddle;				node = StopMiddle->node;				break;			}			else if (StopMiddle->val < symbol)				StopLow = StopMiddle + 1;			else				StopHigh = StopMiddle;		}		if (StopLow >= StopHigh)			break;	}	return NULL;}static char *CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword){	if (flagflags & FF_COMPOUNDONLYAFX)	{		if ((Affix->flagflags & FF_COMPOUNDONLYAFX) == 0)			return NULL;	}	else	{		if (Affix->flagflags & FF_COMPOUNDONLYAFX)			return NULL;	}	if (Affix->type == FF_SUFFIX)	{		strcpy(newword, word);		strcpy(newword + len - Affix->replen, Affix->find);	}	else	{		strcpy(newword, Affix->find);		strcat(newword, word + Affix->replen);	}	if (Affix->issimple)		return newword;	else if (Affix->isregis)	{		if (Affix->compile)		{			RS_compile(&(Affix->reg.regis), (Affix->type == FF_SUFFIX) ? 1 : 0, Affix->mask);			Affix->compile = 0;		}		if (RS_execute(&(Affix->reg.regis), newword, -1))			return newword;	}	else	{		regmatch_t	subs[2];	/* workaround for apache&linux */		int			err;		pg_wchar   *data;		size_t		data_len;		int			dat_len;		if (Affix->compile)		{			int			wmasklen,						masklen = strlen(Affix->mask);			pg_wchar   *mask;			mask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar));			wmasklen = pg_mb2wchar_with_len(Affix->mask, mask, masklen);			err = pg_regcomp(&(Affix->reg.regex), mask, wmasklen, REG_EXTENDED | REG_ICASE | REG_NOSUB);			pfree(mask);			if (err)			{				char		regerrstr[ERRSTRSIZE];				pg_regerror(err, &(Affix->reg.regex), regerrstr, ERRSTRSIZE);				elog(ERROR, "Regex error in '%s': %s", Affix->mask, regerrstr);			}			Affix->compile = 0;		}		/* Convert data string to wide characters */		dat_len = strlen(newword);		data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));		data_len = pg_mb2wchar_with_len(newword, data, dat_len);		if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 1, subs, 0)))		{			pfree(data);			return newword;		}		pfree(data);	}	return NULL;}static char **NormalizeSubWord(IspellDict * Conf, char *word, char flag){	AffixNodeData *suffix = NULL,			   *prefix = NULL;	int			slevel = 0,				plevel = 0;	int			wrdlen = strlen(word),				swrdlen;	char	  **forms;	char	  **cur;	char		newword[2 * MAXNORMLEN] = "";	char		pnewword[2 * MAXNORMLEN] = "";	AffixNode  *snode = Conf->Suffix,			   *pnode;	int			i,				j;	if (wrdlen > MAXNORMLEN)		return NULL;	strlower(word);	cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));	*cur = NULL;	/* Check that the word itself is normal form */	if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD))	{		*cur = pstrdup(word);		cur++;		*cur = NULL;	}	/* Find all other NORMAL forms of the 'word' (check only prefix) */	pnode = Conf->Prefix;	plevel = 0;	while (pnode)	{		prefix = FinfAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);		if (!prefix)			break;		for (j = 0; j < prefix->naff; j++)		{			if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword))			{				/* prefix success */				if (FindWord(Conf, newword, prefix->aff[j]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1))				{					/* word search success */					*cur = pstrdup(newword);					cur++;					*cur = NULL;				}			}		}		pnode = prefix->node;	}	/*	 * Find all other NORMAL forms of the 'word' (check suffix and then	 * prefix)	 */	while (snode)	{		/* find possible suffix */		suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);		if (!suffix)			break;		/* foreach suffix check affix */		for (i = 0; i < suffix->naff; i++)		{			if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword))			{				/* suffix success */				if (FindWord(Conf, newword, suffix->aff[i]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1))				{					/* word search success */					*cur = pstrdup(newword);					cur++;					*cur = NULL;				}				/* now we will look changed word with prefixes */				pnode = Conf->Prefix;				plevel = 0;				swrdlen = strlen(newword);				while (pnode)				{					prefix = FinfAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);					if (!prefix)						break;					for (j = 0; j < prefix->naff; j++)					{						if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword))						{							/* prefix success */							int			ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?							0 : prefix->aff[j]->flag;							if (FindWord(Conf, pnewword, ff, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1))							{								/* word search success */								*cur = pstrdup(pnewword);								cur++;								*cur = NULL;							}						}					}					pnode = prefix->node;				}			}		}		snode = suffix->node;	}	if (cur == forms)	{		pfree(forms);		return (NULL);	}	return (forms);}typedef struct SplitVar{	int			nstem;	char	  **stem;	struct SplitVar *next;}	SplitVar;static intCheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len){	while ((*ptr)->affix)	{		if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)		{			len = (*ptr)->len;			(*ptr)++;			return len;		}		(*ptr)++;	}	return 0;}static SplitVar *CopyVar(SplitVar * s, int makedup){	SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));	v->stem = (char **) palloc(sizeof(char *) * (MAX_NORM));	v->next = NULL;	if (s)	{		int			i;		v->nstem = s->nstem;		for (i = 0; i < s->nstem; i++)			v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];	}	else		v->nstem = 0;	return v;}static SplitVar *SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos){	SplitVar   *var = NULL;	SPNodeData *StopLow,			   *StopHigh,			   *StopMiddle = NULL;	SPNode	   *node = (snode) ? snode : Conf->Dictionary;	int			level = (snode) ? minpos : startpos;	/* recursive														 * minpos==level */	int			lenaff;	CMPDAffix  *caff;	char	   *notprobed;	notprobed = (char *) palloc(wordlen);	memset(notprobed, 1, wordlen);	var = CopyVar(orig, 1);	while (node && level < wordlen)	{		StopLow = node->data;		StopHigh = node->data + node->length;		while (StopLow < StopHigh)		{			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);			if (StopMiddle->val == ((uint8 *) (word))[level])				break;			else if (StopMiddle->val < ((uint8 *) (word))[level])				StopLow = StopMiddle + 1;			else				StopHigh = StopMiddle;		}		if (StopLow >= StopHigh)			break;		/* find word with epenthetic */		caff = Conf->CompoundAffix;		while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0)		{			/*			 * there is one of compound suffixes, so check word for existings			 */			char		buf[MAXNORMLEN];			char	  **subres;			lenaff = level - startpos + lenaff;			if (!notprobed[startpos + lenaff - 1])				continue;			if (level + lenaff - 1 <= minpos)				continue;			memcpy(buf, word + startpos, lenaff);			buf[lenaff] = '\0';			subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX);			if (subres)			{				/* Yes, it was a word from dictionary */				SplitVar   *new = CopyVar(var, 0);				SplitVar   *ptr = var;				char	  **sptr = subres;				notprobed[startpos + lenaff - 1] = 0;				while (*sptr)				{					new->stem[new->nstem] = *sptr;					new->nstem++;					sptr++;				}				pfree(subres);				while (ptr->next)					ptr = ptr->next;				ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);				pfree(new->stem);				pfree(new);			}		}		/* find infinitive */		if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])		{			/* ok, we found full compoundallowed word */			if (level > minpos)			{				/* and its length more than minimal */				if (wordlen == level + 1)				{					/* well, it was last word */					var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);					var->nstem++;					pfree(notprobed);					return var;				}				else				{					/* then we will search more big word at the same point */					SplitVar   *ptr = var;					while (ptr->next)						ptr = ptr->next;					ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);					/* we can find next word */					level++;					var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos);					var->nstem++;					node = Conf->Dictionary;					startpos = level;					continue;				}			}		}		level++;		node = StopMiddle->node;	}	var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);	var->nstem++;	pfree(notprobed);	return var;}TSLexeme *NINormalizeWord(IspellDict * Conf, char *word){	char	  **res = NormalizeSubWord(Conf, word, 0);	TSLexeme   *lcur = NULL,			   *lres = NULL;	uint16		NVariant = 1;	if (res)	{		char	  **ptr = res;		lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));		while (*ptr)		{			lcur->lexeme = *ptr;			lcur->flags = 0;			lcur->nvariant = NVariant++;			lcur++;			ptr++;		}		lcur->lexeme = NULL;		pfree(res);	}	if (Conf->compoundcontrol != '\t')	{		int			wordlen = strlen(word);		SplitVar   *ptr,				   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);		int			i;		while (var)		{			if (var->nstem > 1)			{				char	  **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDWORD);				if (subres)				{					char	  **subptr = subres;					if (!lcur)						lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));					while (*subptr)					{						for (i = 0; i < var->nstem - 1; i++)						{							lcur->lexeme = (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]);							lcur->flags = 0;							lcur->nvariant = NVariant;							lcur++;						}						lcur->lexeme = *subptr;						lcur->flags = 0;						lcur->nvariant = NVariant;						lcur++;						subptr++;						NVariant++;					}					lcur->lexeme = NULL;					pfree(subres);					var->stem[0] = NULL;					pfree(var->stem[var->nstem - 1]);				}			}			for (i = 0; i < var->nstem && var->stem[i]; i++)				pfree(var->stem[i]);			ptr = var->next;			pfree(var->stem);			pfree(var);			var = ptr;		}	}	return lres;}static voidfreeSPNode(SPNode * node){	SPNodeData *data;	if (!node)		return;	data = node->data;	while (node->length)	{		freeSPNode(data->node);		data++;		node->length--;	}	free(node);}static voidfreeANode(AffixNode * node){	AffixNodeData *data;	if (!node)		return;	data = node->data;	while (node->length)	{		freeANode(data->node);		if (data->naff)			free(data->aff);		data++;		node->length--;	}	free(node);}voidNIFree(IspellDict * Conf){	int			i;	AFFIX	   *Affix = (AFFIX *) Conf->Affix;	char	  **aff = Conf->AffixData;	if (aff)	{		while (*aff)		{			free(*aff);			aff++;		}		free(Conf->AffixData);	}	for (i = 0; i < Conf->naffixes; i++)	{		if (Affix[i].compile == 0)		{			if (Affix[i].isregis)				RS_free(&(Affix[i].reg.regis));			else				pg_regfree(&(Affix[i].reg.regex));		}		free(Affix[i].mask);		free(Affix[i].find);		free(Affix[i].repl);	}	if (Conf->Spell)	{		for (i = 0; i < Conf->nspell; i++)			free(Conf->Spell[i].word);		free(Conf->Spell);	}	if (Conf->Affix)		free(Conf->Affix);	if (Conf->CompoundAffix)		free(Conf->CompoundAffix);	freeSPNode(Conf->Dictionary);	freeANode(Conf->Suffix);	freeANode(Conf->Prefix);	memset((void *) Conf, 0, sizeof(IspellDict));	return;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -