tsquery.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 980 行 · 第 1/2 页

C
980
字号
/*------------------------------------------------------------------------- * * tsquery.c *	  I/O functions for tsquery * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.15.2.1 2008/04/11 22:52:17 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "libpq/pqformat.h"#include "miscadmin.h"#include "tsearch/ts_locale.h"#include "tsearch/ts_type.h"#include "tsearch/ts_utils.h"#include "utils/memutils.h"#include "utils/pg_crc.h"struct TSQueryParserStateData{	/* State for gettoken_query */	char	   *buffer;			/* entire string we are scanning */	char	   *buf;			/* current scan point */	int			state;	int			count;			/* nesting count, incremented by (,								 * decremented by ) */	/* polish (prefix) notation in list, filled in by push* functions */	List	   *polstr;	/*	 * Strings from operands are collected in op. curop is a pointer to the	 * end of used space of op.	 */	char	   *op;	char	   *curop;	int			lenop;			/* allocated size of op */	int			sumlen;			/* used size of op */	/* state for value's parser */	TSVectorParseState valstate;};/* parser's states */#define WAITOPERAND 1#define WAITOPERATOR	2#define WAITFIRSTOPERAND 3#define WAITSINGLEOPERAND 4/* * subroutine to parse the weight part, like ':1AB' of a query. */static char *get_weight(char *buf, int16 *weight){	*weight = 0;	if (!t_iseq(buf, ':'))		return buf;	buf++;	while (*buf && pg_mblen(buf) == 1)	{		switch (*buf)		{			case 'a':			case 'A':				*weight |= 1 << 3;				break;			case 'b':			case 'B':				*weight |= 1 << 2;				break;			case 'c':			case 'C':				*weight |= 1 << 1;				break;			case 'd':			case 'D':				*weight |= 1;				break;			default:				return buf;		}		buf++;	}	return buf;}/* * token types for parsing */typedef enum{	PT_END = 0,	PT_ERR = 1,	PT_VAL = 2,	PT_OPR = 3,	PT_OPEN = 4,	PT_CLOSE = 5} ts_tokentype;/* * get token from query string * * *operator is filled in with OP_* when return values is PT_OPR * *strval, *lenval and *weight are filled in when return value is PT_VAL */static ts_tokentypegettoken_query(TSQueryParserState state,			   int8 *operator,			   int *lenval, char **strval, int16 *weight){	while (1)	{		switch (state->state)		{			case WAITFIRSTOPERAND:			case WAITOPERAND:				if (t_iseq(state->buf, '!'))				{					(state->buf)++;		/* can safely ++, t_iseq guarantee										 * that pg_mblen()==1 */					*operator = OP_NOT;					state->state = WAITOPERAND;					return PT_OPR;				}				else if (t_iseq(state->buf, '('))				{					state->count++;					(state->buf)++;					state->state = WAITOPERAND;					return PT_OPEN;				}				else if (t_iseq(state->buf, ':'))				{					ereport(ERROR,							(errcode(ERRCODE_SYNTAX_ERROR),							 errmsg("syntax error in tsquery: \"%s\"",									state->buffer)));				}				else if (!t_isspace(state->buf))				{					/*					 * We rely on the tsvector parser to parse the value for					 * us					 */					reset_tsvector_parser(state->valstate, state->buf);					if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))					{						state->buf = get_weight(state->buf, weight);						state->state = WAITOPERATOR;						return PT_VAL;					}					else if (state->state == WAITFIRSTOPERAND)						return PT_END;					else						ereport(ERROR,								(errcode(ERRCODE_SYNTAX_ERROR),								 errmsg("no operand in tsquery: \"%s\"",										state->buffer)));				}				break;			case WAITOPERATOR:				if (t_iseq(state->buf, '&'))				{					state->state = WAITOPERAND;					*operator = OP_AND;					(state->buf)++;					return PT_OPR;				}				if (t_iseq(state->buf, '|'))				{					state->state = WAITOPERAND;					*operator = OP_OR;					(state->buf)++;					return PT_OPR;				}				else if (t_iseq(state->buf, ')'))				{					(state->buf)++;					state->count--;					return (state->count < 0) ? PT_ERR : PT_CLOSE;				}				else if (*(state->buf) == '\0')					return (state->count) ? PT_ERR : PT_END;				else if (!t_isspace(state->buf))					return PT_ERR;				break;			case WAITSINGLEOPERAND:				if (*(state->buf) == '\0')					return PT_END;				*strval = state->buf;				*lenval = strlen(state->buf);				state->buf += strlen(state->buf);				state->count++;				return PT_VAL;			default:				return PT_ERR;				break;		}		state->buf += pg_mblen(state->buf);	}	return PT_END;}/* * Push an operator to state->polstr */voidpushOperator(TSQueryParserState state, int8 oper){	QueryOperator *tmp;	Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);	tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));	tmp->type = QI_OPR;	tmp->oper = oper;	/* left is filled in later with findoprnd */	state->polstr = lcons(tmp, state->polstr);}static voidpushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight){	QueryOperand *tmp;	if (distance >= MAXSTRPOS)		ereport(ERROR,				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),				 errmsg("value is too big in tsquery: \"%s\"",						state->buffer)));	if (lenval >= MAXSTRLEN)		ereport(ERROR,				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),				 errmsg("operand is too long in tsquery: \"%s\"",						state->buffer)));	tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));	tmp->type = QI_VAL;	tmp->weight = weight;	tmp->valcrc = (int32) valcrc;	tmp->length = lenval;	tmp->distance = distance;	state->polstr = lcons(tmp, state->polstr);}/* * Push an operand to state->polstr. * * strval must point to a string equal to state->curop. lenval is the length * of the string. */voidpushValue(TSQueryParserState state, char *strval, int lenval, int2 weight){	pg_crc32	valcrc;	if (lenval >= MAXSTRLEN)		ereport(ERROR,				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),				 errmsg("word is too long in tsquery: \"%s\"",						state->buffer)));	INIT_CRC32(valcrc);	COMP_CRC32(valcrc, strval, lenval);	FIN_CRC32(valcrc);	pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);	/* append the value string to state.op, enlarging buffer if needed first */	while (state->curop - state->op + lenval + 1 >= state->lenop)	{		int			used = state->curop - state->op;		state->lenop *= 2;		state->op = (char *) repalloc((void *) state->op, state->lenop);		state->curop = state->op + used;	}	memcpy((void *) state->curop, (void *) strval, lenval);	state->curop += lenval;	*(state->curop) = '\0';	state->curop++;	state->sumlen += lenval + 1 /* \0 */ ;}/* * Push a stopword placeholder to state->polstr */voidpushStop(TSQueryParserState state){	QueryOperand *tmp;	tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));	tmp->type = QI_VALSTOP;	state->polstr = lcons(tmp, state->polstr);}#define STACKDEPTH	32/* * Make polish (prefix) notation of query. * * See parse_tsquery for explanation of pushval. */static voidmakepol(TSQueryParserState state,		PushFunction pushval,		Datum opaque){	int8		operator = 0;	ts_tokentype type;	int			lenval = 0;	char	   *strval = NULL;	int8		opstack[STACKDEPTH];	int			lenstack = 0;	int16		weight = 0;	/* since this function recurses, it could be driven to stack overflow */	check_stack_depth();	while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)	{		switch (type)		{			case PT_VAL:				pushval(opaque, state, strval, lenval, weight);				while (lenstack && (opstack[lenstack - 1] == OP_AND ||									opstack[lenstack - 1] == OP_NOT))				{					lenstack--;					pushOperator(state, opstack[lenstack]);				}				break;			case PT_OPR:				if (lenstack && operator == OP_OR)					pushOperator(state, OP_OR);				else				{					if (lenstack == STACKDEPTH) /* internal error */						elog(ERROR, "tsquery stack too small");					opstack[lenstack] = operator;					lenstack++;				}				break;			case PT_OPEN:				makepol(state, pushval, opaque);				if (lenstack && (opstack[lenstack - 1] == OP_AND ||								 opstack[lenstack - 1] == OP_NOT))				{					lenstack--;					pushOperator(state, opstack[lenstack]);				}				break;			case PT_CLOSE:				while (lenstack)				{					lenstack--;					pushOperator(state, opstack[lenstack]);				};				return;			case PT_ERR:			default:				ereport(ERROR,						(errcode(ERRCODE_SYNTAX_ERROR),						 errmsg("syntax error in tsquery: \"%s\"",								state->buffer)));		}	}	while (lenstack)	{		lenstack--;		pushOperator(state, opstack[lenstack]);	}}static voidfindoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes){	/* since this function recurses, it could be driven to stack overflow. */	check_stack_depth();	if (*pos >= nnodes)		elog(ERROR, "malformed tsquery: operand not found");	if (ptr[*pos].type == QI_VAL ||		ptr[*pos].type == QI_VALSTOP)	/* need to handle VALSTOP here, they										 * haven't been cleaned away yet. */	{		(*pos)++;	}	else	{		Assert(ptr[*pos].type == QI_OPR);		if (ptr[*pos].operator.oper == OP_NOT)		{			ptr[*pos].operator.left = 1;			(*pos)++;			findoprnd_recurse(ptr, pos, nnodes);		}		else		{			QueryOperator *curitem = &ptr[*pos].operator;			int			tmp = *pos;			Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);			(*pos)++;			findoprnd_recurse(ptr, pos, nnodes);			curitem->left = *pos - tmp;			findoprnd_recurse(ptr, pos, nnodes);		}	}}/* * Fills in the left-fields previously left unfilled. The input * QueryItems must be in polish (prefix) notation. */static voidfindoprnd(QueryItem *ptr, int size){	uint32		pos;	pos = 0;	findoprnd_recurse(ptr, &pos, size);	if (pos != size)		elog(ERROR, "malformed tsquery: extra nodes");}/* * Each value (operand) in the query is be passed to pushval. pushval can * transform the simple value to an arbitrarily complex expression using * pushValue and pushOperator. It must push a single value with pushValue, * a complete expression with all operands, or a a stopword placeholder * with pushStop, otherwise the prefix notation representation will be broken, * having an operator with no operand. * * opaque is passed on to pushval as is, pushval can use it to store its * private state. * * The returned query might contain QI_STOPVAL nodes. The caller is responsible * for cleaning them up (with clean_fakeval) */TSQueryparse_tsquery(char *buf,			  PushFunction pushval,			  Datum opaque,			  bool isplain){	struct TSQueryParserStateData state;	int			i;	TSQuery		query;	int			commonlen;	QueryItem  *ptr;	ListCell   *cell;	/* init state */	state.buffer = buf;	state.buf = buf;	state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;	state.count = 0;	state.polstr = NIL;	/* init value parser's state */	state.valstate = init_tsvector_parser(state.buffer, true, true);	/* init list of operand */	state.sumlen = 0;	state.lenop = 64;	state.curop = state.op = (char *) palloc(state.lenop);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?