📄 spell.c

📁 PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "postgres.h"#include "spell.h"#define MAX_NORM 1024#define MAXNORMLEN 256#define ERRSTRSIZE	1024#define STRNCASECMP(x,y)		pg_strncasecmp(x, y, strlen(y))#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )#define GETCHAR(A,N,T)	  GETWCHAR( (A)->repl, (A)->replen, N, T )#define MEMOUT(X)  if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))static intcmpspell(const void *s1, const void *s2){	return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));}static intcmpspellaffix(const void *s1, const void *s2){	return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));}static voidstrlower(char *str){	unsigned char *ptr = (unsigned char *) str;	while (*ptr)	{		*ptr = tolower(*ptr);		ptr++;	}}static char *strnduplicate(char *s, int len){	char	   *d = (char *) palloc(len + 1);	memcpy(d, s, len);	d[len] = '\0';	return d;}/* backward string compare for suffix tree operations */static intstrbcmp(const unsigned char *s1, const unsigned char *s2){	int			l1 = strlen((const char *) s1) - 1,				l2 = strlen((const char *) s2) - 1;	while (l1 >= 0 && l2 >= 0)	{		if (s1[l1] < s2[l2])			return -1;		if (s1[l1] > s2[l2])			return 1;		l1--;		l2--;	}	if (l1 < l2)		return -1;	if (l1 > l2)		return 1;	return 0;}static intstrbncmp(const unsigned char *s1, const unsigned char *s2, size_t count){	int			l1 = strlen((const char *) s1) - 1,				l2 = strlen((const char *) s2) - 1,				l = count;	while (l1 >= 0 && l2 >= 0 && l > 0)	{		if (s1[l1] < s2[l2])			return -1;		if (s1[l1] > s2[l2])			return 1;		l1--;		l2--;		l--;	}	if (l == 0)		return 0;	if (l1 < l2)		return -1;	if (l1 > l2)		return 1;	return 0;}static intcmpaffix(const void *s1, const void *s2){	const AFFIX *a1 = (const AFFIX *) s1;	const AFFIX *a2 = (const AFFIX *) s2;	if (a1->type < a2->type)		return -1;	if (a1->type > a2->type)		return 1;	if (a1->type == FF_PREFIX)		return strcmp(a1->repl, a2->repl);	else		return strbcmp((const unsigned char *) a1->repl,					   (const unsigned char *) a2->repl);}intNIAddSpell(IspellDict * Conf, const char *word, const char *flag){	if (Conf->nspell >= Conf->mspell)	{		if (Conf->mspell)		{			Conf->mspell += 1024 * 20;			Conf->Spell = (SPELL *) realloc(Conf->Spell, Conf->mspell * sizeof(SPELL));		}		else		{			Conf->mspell = 1024 * 20;			Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));		}		MEMOUT(Conf->Spell);	}	Conf->Spell[Conf->nspell].word = strdup(word);	MEMOUT(Conf->Spell[Conf->nspell].word);	strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);	Conf->nspell++;	return (0);}intNIImportDictionary(IspellDict * Conf, const char *filename){	char		str[BUFSIZ];	FILE	   *dict;	if (!(dict = fopen(filename, "r")))		return (1);	while (fgets(str, sizeof(str), dict))	{		char	   *s;		const char *flag;		flag = NULL;		if ((s = strchr(str, '/')))		{			*s++ = '\0';			flag = s;			while (*s)			{				if (isprint((unsigned char) *s) &&					!isspace((unsigned char) *s))					s++;				else				{					*s = '\0';					break;				}			}		}		else			flag = "";		strlower(str);		/* Dont load words if first letter is not required */		/* It allows to optimize loading at  search time   */		s = str;		while (*s)		{			if (*s == '\r' || *s == '\n')				*s = '\0';			s++;		}		NIAddSpell(Conf, str, flag);	}	fclose(dict);	return (0);}static intFindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly){	SPNode	   *node = Conf->Dictionary;	SPNodeData *StopLow,			   *StopHigh,			   *StopMiddle;	uint8	   *ptr = (uint8 *) word;	while (node && *ptr)	{		StopLow = node->data;		StopHigh = node->data + node->length;		while (StopLow < StopHigh)		{			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);			if (StopMiddle->val == *ptr)			{				if (*(ptr + 1) == '\0' && StopMiddle->isword)				{					if (compoundonly && !StopMiddle->compoundallow)						return 0;					if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))						return 1;				}				node = StopMiddle->node;				ptr++;				break;			}			else if (StopMiddle->val < *ptr)				StopLow = StopMiddle + 1;			else				StopHigh = StopMiddle;		}		if (StopLow >= StopHigh)			break;	}	return 0;}intNIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type){	if (Conf->naffixes >= Conf->maffixes)	{		if (Conf->maffixes)		{			Conf->maffixes += 16;			Conf->Affix = (AFFIX *) realloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));		}		else		{			Conf->maffixes = 16;			Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));		}		MEMOUT(Conf->Affix);	}	if (strcmp(mask, ".") == 0)	{		Conf->Affix[Conf->naffixes].issimple = 1;		Conf->Affix[Conf->naffixes].isregis = 0;		Conf->Affix[Conf->naffixes].mask = strdup("");	}	else if (RS_isRegis(mask))	{		Conf->Affix[Conf->naffixes].issimple = 0;		Conf->Affix[Conf->naffixes].isregis = 1;		Conf->Affix[Conf->naffixes].mask = strdup(mask);	}	else	{		Conf->Affix[Conf->naffixes].issimple = 0;		Conf->Affix[Conf->naffixes].isregis = 0;		Conf->Affix[Conf->naffixes].mask = (char *) malloc(strlen(mask) + 2);		if (type == FF_SUFFIX)			sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);		else			sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);	}	MEMOUT(Conf->Affix[Conf->naffixes].mask);	Conf->Affix[Conf->naffixes].compile = 1;	Conf->Affix[Conf->naffixes].flagflags = flagflags;	Conf->Affix[Conf->naffixes].flag = flag;	Conf->Affix[Conf->naffixes].type = type;	Conf->Affix[Conf->naffixes].find = strdup(find);	MEMOUT(Conf->Affix[Conf->naffixes].find);	Conf->Affix[Conf->naffixes].repl = strdup(repl);	MEMOUT(Conf->Affix[Conf->naffixes].repl);	Conf->Affix[Conf->naffixes].replen = strlen(repl);	Conf->naffixes++;	return (0);}static char *remove_spaces(char *dist, char *src){	char	   *d,			   *s;	d = dist;	s = src;	while (*s)	{		if (*s != ' ' && *s != '-' && *s != '\t')		{			*d = *s;			d++;		}		s++;	}	*d = 0;	return (dist);}intNIImportAffixes(IspellDict * Conf, const char *filename){	char		str[BUFSIZ];	char		mask[BUFSIZ];	char		find[BUFSIZ];	char		repl[BUFSIZ];	char	   *s;	int			i;	int			suffixes = 0;	int			prefixes = 0;	int			flag = 0;	char		flagflags = 0;	FILE	   *affix;	if (!(affix = fopen(filename, "r")))		return (1);	Conf->compoundcontrol = '\t';	while (fgets(str, sizeof(str), affix))	{		if (STRNCASECMP(str, "compoundwords") == 0)		{			s = strchr(str, 'l');			if (s)			{				while (*s != ' ')					s++;				while (*s == ' ')					s++;				Conf->compoundcontrol = *s;				continue;			}		}		if (STRNCASECMP(str, "suffixes") == 0)		{			suffixes = 1;			prefixes = 0;			continue;		}		if (STRNCASECMP(str, "prefixes") == 0)		{			suffixes = 0;			prefixes = 1;			continue;		}		if (STRNCASECMP(str, "flag ") == 0)		{			s = str + 5;			flagflags = 0;			while (*s == ' ')				s++;			if (*s == '*')			{				flagflags |= FF_CROSSPRODUCT;				s++;			}			else if (*s == '~')			{				flagflags |= FF_COMPOUNDONLYAFX;				s++;			}			if (*s == '\\')				s++;			flag = (unsigned char) *s;			continue;		}		if ((!suffixes) && (!prefixes))			continue;		if ((s = strchr(str, '#')))			*s = 0;		if (!*str)			continue;		strlower(str);		strcpy(mask, "");		strcpy(find, "");		strcpy(repl, "");		i = sscanf(str, "%[^>\n]>%[^,\n],%[^\n]", mask, find, repl);		remove_spaces(str, repl);		strcpy(repl, str);		remove_spaces(str, find);		strcpy(find, str);		remove_spaces(str, mask);		strcpy(mask, str);		switch (i)		{			case 3:				break;			case 2:				if (*find != '\0')				{					strcpy(repl, find);					strcpy(find, "");				}				break;			default:				continue;		}		NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);	}	fclose(affix);	return (0);}static intMergeAffix(IspellDict * Conf, int a1, int a2){	int			naffix = 0;	char	  **ptr = Conf->AffixData;	while (*ptr)	{		naffix++;		ptr++;	}	Conf->AffixData = (char **) realloc(Conf->AffixData, (naffix + 2) * sizeof(char *));	MEMOUT(Conf->AffixData);	ptr = Conf->AffixData + naffix;	*ptr = malloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ );	MEMOUT(ptr);	sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);	ptr++;	*ptr = '\0';	return naffix;}static SPNode *mkSPNode(IspellDict * Conf, int low, int high, int level){	int			i;	int			nchar = 0;	char		lastchar = '\0';	SPNode	   *rs;	SPNodeData *data;	int			lownew = low;	for (i = low; i < high; i++)		if (Conf->Spell[i].p.d.len > level && lastchar != Conf->Spell[i].word[level])		{			nchar++;			lastchar = Conf->Spell[i].word[level];		}	if (!nchar)		return NULL;	rs = (SPNode *) malloc(SPNHRDSZ + nchar * sizeof(SPNodeData));	MEMOUT(rs);	memset(rs, 0, SPNHRDSZ + nchar * sizeof(SPNodeData));	rs->length = nchar;	data = rs->data;	lastchar = '\0';	for (i = low; i < high; i++)		if (Conf->Spell[i].p.d.len > level)		{			if (lastchar != Conf->Spell[i].word[level])			{				if (lastchar)				{					data->node = mkSPNode(Conf, lownew, i, level + 1);					lownew = i;					data++;				}				lastchar = Conf->Spell[i].word[level];			}			data->val = ((uint8 *) (Conf->Spell[i].word))[level];			if (Conf->Spell[i].p.d.len == level + 1)			{				if (data->isword && data->affix != Conf->Spell[i].p.d.affix)				{					/*					 * fprintf(stderr,"Word already exists: %s (affixes: '%s'					 * and '%s')\n", Conf->Spell[i].word,					 * Conf->AffixData[data->affix],					 * Conf->AffixData[Conf->Spell[i].p.d.affix] );					 */					/* MergeAffix called a few times */					data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);				}				else					data->affix = Conf->Spell[i].p.d.affix;				data->isword = 1;				if (strchr(Conf->AffixData[data->affix], Conf->compoundcontrol))					data->compoundallow = 1;			}		}	data->node = mkSPNode(Conf, lownew, high, level + 1);	return rs;}voidNISortDictionary(IspellDict * Conf){	size_t		i;	int			naffix = 3;	/* compress affixes */	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);	for (i = 1; i < Conf->nspell; i++)		if (strcmp(Conf->Spell[i].p.flag, Conf->Spell[i - 1].p.flag))			naffix++;	Conf->AffixData = (char **) malloc(naffix * sizeof(char *));	MEMOUT(Conf->AffixData);	memset(Conf->AffixData, 0, naffix * sizeof(char *));	naffix = 1;	Conf->AffixData[0] = strdup("");	MEMOUT(Conf->AffixData[0]);	Conf->AffixData[1] = strdup(Conf->Spell[0].p.flag);	MEMOUT(Conf->AffixData[1]);	Conf->Spell[0].p.d.affix = 1;	Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);	for (i = 1; i < Conf->nspell; i++)	{		if (strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]))		{			naffix++;			Conf->AffixData[naffix] = strdup(Conf->Spell[i].p.flag);			MEMOUT(Conf->AffixData[naffix]);		}		Conf->Spell[i].p.d.affix = naffix;		Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);	}	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);	Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);	for (i = 0; i < Conf->nspell; i++)		free(Conf->Spell[i].word);	free(Conf->Spell);	Conf->Spell = NULL;}static AffixNode *mkANode(IspellDict * Conf, int low, int high, int level, int type){	int			i;	int			nchar = 0;	uint8		lastchar = '\0';	AffixNode  *rs;	AffixNodeData *data;	int			lownew = low;	for (i = low; i < high; i++)		if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))		{			nchar++;			lastchar = GETCHAR(Conf->Affix + i, level, type);		}	if (!nchar)		return NULL;	rs = (AffixNode *) malloc(ANHRDSZ + nchar * sizeof(AffixNodeData));	MEMOUT(rs);	memset(rs, 0, ANHRDSZ + nchar * sizeof(AffixNodeData));	rs->length = nchar;	data = rs->data;	lastchar = '\0';	for (i = low; i < high; i++)		if (Conf->Affix[i].replen > level)		{			if (lastchar != GETCHAR(Conf->Affix + i, level, type))			{				if (lastchar)				{					data->node = mkANode(Conf, lownew, i, level + 1, type);					lownew = i;					data++;				}				lastchar = GETCHAR(Conf->Affix + i, level, type);			}			data->val = GETCHAR(Conf->Affix + i, level, type);			if (Conf->Affix[i].replen == level + 1)			{					/* affix stopped */				if (!data->naff)				{					data->aff = (AFFIX **) malloc(sizeof(AFFIX *) * (high - i + 1));					MEMOUT(data->aff);				}				data->aff[data->naff] = Conf->Affix + i;				data->naff++;			}		}	data->node = mkANode(Conf, lownew, high, level + 1, type);	return rs;}static voidmkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix){	int			i,				cnt = 0;	int			start = (issuffix) ? startsuffix : 0;	int			end = (issuffix) ? Conf->naffixes : startsuffix;	AffixNode  *Affix = (AffixNode *) malloc(ANHRDSZ + sizeof(AffixNodeData));	MEMOUT(Affix);	memset(Affix, 0, ANHRDSZ + sizeof(AffixNodeData));	Affix->length = 1;	Affix->isvoid = 1;	if (issuffix)	{		Affix->data->node = Conf->Suffix;		Conf->Suffix = Affix;	}	else	{		Affix->data->node = Conf->Prefix;		Conf->Prefix = Affix;	}	for (i = start; i < end; i++)		if (Conf->Affix[i].replen == 0)			cnt++;	if (cnt == 0)		return;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -