spell.c

来自「PostgreSQL7.4.6 for Linux」· C语言代码 · 共 657 行
657 行
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "postgres.h"#include "spell.h"#define MAXNORMLEN 56#define STRNCASECMP(x,y)		(strncasecmp(x,y,strlen(y)))static intcmpspell(const void *s1, const void *s2){	return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));}static voidstrlower(char *str){	unsigned char *ptr = (unsigned char *) str;	while (*ptr)	{		*ptr = tolower(*ptr);		ptr++;	}}/* backward string compaire for suffix tree operations */static intstrbcmp(const char *s1, const char *s2){	int			l1 = strlen(s1) - 1,				l2 = strlen(s2) - 1;	while (l1 >= 0 && l2 >= 0)	{		if (s1[l1] < s2[l2])			return -1;		if (s1[l1] > s2[l2])			return 1;		l1--;		l2--;	}	if (l1 < l2)		return -1;	if (l1 > l2)		return 1;	return 0;}static intstrbncmp(const char *s1, const char *s2, size_t count){	int			l1 = strlen(s1) - 1,				l2 = strlen(s2) - 1,				l = count;	while (l1 >= 0 && l2 >= 0 && l > 0)	{		if (s1[l1] < s2[l2])			return -1;		if (s1[l1] > s2[l2])			return 1;		l1--;		l2--;		l--;	}	if (l == 0)		return 0;	if (l1 < l2)		return -1;	if (l1 > l2)		return 1;	return 0;}static intcmpaffix(const void *s1, const void *s2){	if (((const AFFIX *) s1)->type < ((const AFFIX *) s2)->type)		return -1;	if (((const AFFIX *) s1)->type > ((const AFFIX *) s2)->type)		return 1;	if (((const AFFIX *) s1)->type == 'p')		return (strcmp(((const AFFIX *) s1)->repl, ((const AFFIX *) s2)->repl));	else		return (strbcmp(((const AFFIX *) s1)->repl, ((const AFFIX *) s2)->repl));}intAddSpell(IspellDict * Conf, const char *word, const char *flag){	if (Conf->nspell >= Conf->mspell)	{		if (Conf->mspell)		{			Conf->mspell += 1024 * 20;			Conf->Spell = (SPELL *) realloc(Conf->Spell, Conf->mspell * sizeof(SPELL));		}		else		{			Conf->mspell = 1024 * 20;			Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));		}		if (Conf->Spell == NULL)			ereport(ERROR,					(errcode(ERRCODE_OUT_OF_MEMORY),					 errmsg("out of memory")));	}	Conf->Spell[Conf->nspell].word = strdup(word);	if (!Conf->Spell[Conf->nspell].word)		ereport(ERROR,				(errcode(ERRCODE_OUT_OF_MEMORY),				 errmsg("out of memory")));	strncpy(Conf->Spell[Conf->nspell].flag, flag, 10);	Conf->nspell++;	return (0);}intImportDictionary(IspellDict * Conf, const char *filename){	unsigned char str[BUFSIZ];	FILE	   *dict;	if (!(dict = fopen(filename, "r")))		return (1);	while (fgets(str, sizeof(str), dict))	{		unsigned char *s;		const unsigned char *flag;		flag = NULL;		if ((s = strchr(str, '/')))		{			*s = 0;			s++;			flag = s;			while (*s)			{				if (((*s >= 'A') && (*s <= 'Z')) || ((*s >= 'a') && (*s <= 'z')))					s++;				else				{					*s = 0;					break;				}			}		}		else			flag = "";		strlower(str);		/* Dont load words if first letter is not required */		/* It allows to optimize loading at  search time   */		s = str;		while (*s)		{			if (*s == '\r')				*s = 0;			if (*s == '\n')				*s = 0;			s++;		}		AddSpell(Conf, str, flag);	}	fclose(dict);	return (0);}static SPELL *FindWord(IspellDict * Conf, const char *word, int affixflag){	int			l,				c,				r,				resc,				resl,				resr,				i;	i = (int) (*word) & 255;	l = Conf->SpellTree.Left[i];	r = Conf->SpellTree.Right[i];	if (l == -1)		return (NULL);	while (l <= r)	{		c = (l + r) >> 1;		resc = strcmp(Conf->Spell[c].word, word);		if ((resc == 0) &&			((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)))			return (&Conf->Spell[c]);		resl = strcmp(Conf->Spell[l].word, word);		if ((resl == 0) &&			((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)))			return (&Conf->Spell[l]);		resr = strcmp(Conf->Spell[r].word, word);		if ((resr == 0) &&			((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)))			return (&Conf->Spell[r]);		if (resc < 0)		{			l = c + 1;			r--;		}		else if (resc > 0)		{			r = c - 1;			l++;		}		else		{			l++;			r--;		}	}	return (NULL);}intAddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type){	if (Conf->naffixes >= Conf->maffixes)	{		if (Conf->maffixes)		{			Conf->maffixes += 16;			Conf->Affix = (AFFIX *) realloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));		}		else		{			Conf->maffixes = 16;			Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));		}		if (Conf->Affix == NULL)			ereport(ERROR,					(errcode(ERRCODE_OUT_OF_MEMORY),					 errmsg("out of memory")));	}	if (type == 's')		sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);	else		sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);	Conf->Affix[Conf->naffixes].compile = 1;	Conf->Affix[Conf->naffixes].flag = flag;	Conf->Affix[Conf->naffixes].type = type;	strcpy(Conf->Affix[Conf->naffixes].find, find);	strcpy(Conf->Affix[Conf->naffixes].repl, repl);	Conf->Affix[Conf->naffixes].replen = strlen(repl);	Conf->naffixes++;	return (0);}static char *remove_spaces(char *dist, char *src){	char	   *d,			   *s;	d = dist;	s = src;	while (*s)	{		if (*s != ' ' && *s != '-' && *s != '\t')		{			*d = *s;			d++;		}		s++;	}	*d = 0;	return (dist);}intImportAffixes(IspellDict * Conf, const char *filename){	unsigned char str[BUFSIZ];	unsigned char flag = 0;	unsigned char mask[BUFSIZ] = "";	unsigned char find[BUFSIZ] = "";	unsigned char repl[BUFSIZ] = "";	unsigned char *s;	int			i;	int			suffixes = 0;	int			prefixes = 0;	FILE	   *affix;	if (!(affix = fopen(filename, "r")))		return (1);	while (fgets(str, sizeof(str), affix))	{		if (!STRNCASECMP(str, "suffixes"))		{			suffixes = 1;			prefixes = 0;			continue;		}		if (!STRNCASECMP(str, "prefixes"))		{			suffixes = 0;			prefixes = 1;			continue;		}		if (!STRNCASECMP(str, "flag "))		{			s = str + 5;			while (strchr("* ", *s))				s++;			flag = *s;			continue;		}		if ((!suffixes) && (!prefixes))			continue;		if ((s = strchr(str, '#')))			*s = 0;		if (!*str)			continue;		strlower(str);		strcpy(mask, "");		strcpy(find, "");		strcpy(repl, "");		i = sscanf(str, "%[^>\n]>%[^,\n],%[^\n]", mask, find, repl);		remove_spaces(str, repl);		strcpy(repl, str);		remove_spaces(str, find);		strcpy(find, str);		remove_spaces(str, mask);		strcpy(mask, str);		switch (i)		{			case 3:				break;			case 2:				if (*find != '\0')				{					strcpy(repl, find);					strcpy(find, "");				}				break;			default:				continue;		}		AddAffix(Conf, (int) flag, mask, find, repl, suffixes ? 's' : 'p');	}	fclose(affix);	return (0);}voidSortDictionary(IspellDict * Conf){	int			CurLet = -1,				Let;	size_t		i;	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);	for (i = 0; i < 256; i++)		Conf->SpellTree.Left[i] = -1;	for (i = 0; i < Conf->nspell; i++)	{		Let = (int) (*(Conf->Spell[i].word)) & 255;		if (CurLet != Let)		{			Conf->SpellTree.Left[Let] = i;			CurLet = Let;		}		Conf->SpellTree.Right[Let] = i;	}}voidSortAffixes(IspellDict * Conf){	int			CurLetP = -1,				CurLetS = -1,				Let;	AFFIX	   *Affix;	size_t		i;	if (Conf->naffixes > 1)		qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);	for (i = 0; i < 256; i++)	{		Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;		Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;	}	for (i = 0; i < Conf->naffixes; i++)	{		Affix = &(((AFFIX *) Conf->Affix)[i]);		if (Affix->type == 'p')		{			Let = (int) (*(Affix->repl)) & 255;			if (CurLetP != Let)			{				Conf->PrefixTree.Left[Let] = i;				CurLetP = Let;			}			Conf->PrefixTree.Right[Let] = i;		}		else		{			Let = (Affix->replen) ? (int) (Affix->repl[Affix->replen - 1]) & 255 : 0;			if (CurLetS != Let)			{				Conf->SuffixTree.Left[Let] = i;				CurLetS = Let;			}			Conf->SuffixTree.Right[Let] = i;		}	}}static char *CheckSuffix(const char *word, size_t len, AFFIX * Affix, int *res, IspellDict * Conf){	regmatch_t	subs[2];		/* workaround for apache&linux */	char		newword[2 * MAXNORMLEN] = "";	int			err;	*res = strbncmp(word, Affix->repl, Affix->replen);	if (*res < 0)		return NULL;	if (*res > 0)		return NULL;	strcpy(newword, word);	strcpy(newword + len - Affix->replen, Affix->find);	if (Affix->compile)	{		err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);		if (err)		{			/* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */			regfree(&(Affix->reg));			return (NULL);		}		Affix->compile = 0;	}	if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))	{		if (FindWord(Conf, newword, Affix->flag))			return pstrdup(newword);	}	return NULL;}#define NS 1#define MAX_NORM 512static intCheckPrefix(const char *word, size_t len, AFFIX * Affix, IspellDict * Conf, int pi,			char **forms, char ***cur){	regmatch_t	subs[NS * 2];	char		newword[2 * MAXNORMLEN] = "";	int			err,				ls,				res,				lres;	size_t		newlen;	AFFIX	   *CAffix = Conf->Affix;	res = strncmp(word, Affix->repl, Affix->replen);	if (res != 0)		return res;	strcpy(newword, Affix->find);	strcat(newword, word + Affix->replen);	if (Affix->compile)	{		err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);		if (err)		{			/* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */			regfree(&(Affix->reg));			return (0);		}		Affix->compile = 0;	}	if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))	{		SPELL	   *curspell;		if ((curspell = FindWord(Conf, newword, Affix->flag)))		{			if ((*cur - forms) < (MAX_NORM - 1))			{				**cur = pstrdup(newword);				(*cur)++;				**cur = NULL;			}		}		newlen = strlen(newword);		ls = Conf->SuffixTree.Left[pi];		if (ls >= 0 && ((*cur - forms) < (MAX_NORM - 1)))		{			**cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);			if (**cur)			{				(*cur)++;				**cur = NULL;			}		}	}	return 0;}char	  **NormalizeWord(IspellDict * Conf, char *word){/*regmatch_t subs[NS];*/	size_t		len;	char	  **forms;	char	  **cur;	AFFIX	   *Affix;	int			ri,				pi,				ipi,				lp,				rp,				cp,				ls,				rs;	int			lres,				rres,				cres = 0;	SPELL	   *spell;	len = strlen(word);	if (len > MAXNORMLEN)		return (NULL);	strlower(word);	forms = (char **) palloc(MAX_NORM * sizeof(char **));	cur = forms;	*cur = NULL;	ri = (int) (*word) & 255;	pi = (int) (word[strlen(word) - 1]) & 255;	Affix = (AFFIX *) Conf->Affix;	/* Check that the word itself is normal form */	if ((spell = FindWord(Conf, word, 0)))	{		*cur = pstrdup(word);		cur++;		*cur = NULL;	}	/* Find all other NORMAL forms of the 'word' */	for (ipi = 0; ipi <= pi; ipi += pi)	{		/* check prefix */		lp = Conf->PrefixTree.Left[ri];		rp = Conf->PrefixTree.Right[ri];		while (lp >= 0 && lp <= rp)		{			cp = (lp + rp) >> 1;			cres = 0;			if ((cur - forms) < (MAX_NORM - 1))				cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);			if ((lp < cp) && ((cur - forms) < (MAX_NORM - 1)))				lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);			if ((rp > cp) && ((cur - forms) < (MAX_NORM - 1)))				rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);			if (cres < 0)			{				rp = cp - 1;				lp++;			}			else if (cres > 0)			{				lp = cp + 1;				rp--;			}			else			{				lp++;				rp--;			}		}		/* check suffix */		ls = Conf->SuffixTree.Left[ipi];		rs = Conf->SuffixTree.Right[ipi];		while (ls >= 0 && ls <= rs)		{			if (((cur - forms) < (MAX_NORM - 1)))			{				*cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);				if (*cur)				{					cur++;					*cur = NULL;				}			}			if ((rs > ls) && ((cur - forms) < (MAX_NORM - 1)))			{				*cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);				if (*cur)				{					cur++;					*cur = NULL;				}			}			ls++;			rs--;		}						/* end while */	}							/* for ipi */	if (cur == forms)	{		pfree(forms);		return (NULL);	}	return (forms);}voidFreeIspell(IspellDict * Conf){	int			i;	AFFIX	   *Affix = (AFFIX *) Conf->Affix;	for (i = 0; i < Conf->naffixes; i++)	{		if (Affix[i].compile == 0)			regfree(&(Affix[i].reg));	}	for (i = 0; i < Conf->naffixes; i++)		free(Conf->Spell[i].word);	free(Conf->Affix);	free(Conf->Spell);	memset((void *) Conf, 0, sizeof(IspellDict));	return;}
spell.c - 源码说明

本页面展示了「PostgreSQL7.4.6 for Linux」中的 spell.c 源码文件，采用 C语言编程语言编写，共 657 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?