tsquery.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 980 行 · 第 1/2 页
C
980 行
/*------------------------------------------------------------------------- * * tsquery.c * I/O functions for tsquery * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.15.2.1 2008/04/11 22:52:17 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "libpq/pqformat.h"#include "miscadmin.h"#include "tsearch/ts_locale.h"#include "tsearch/ts_type.h"#include "tsearch/ts_utils.h"#include "utils/memutils.h"#include "utils/pg_crc.h"struct TSQueryParserStateData{ /* State for gettoken_query */ char *buffer; /* entire string we are scanning */ char *buf; /* current scan point */ int state; int count; /* nesting count, incremented by (, * decremented by ) */ /* polish (prefix) notation in list, filled in by push* functions */ List *polstr; /* * Strings from operands are collected in op. curop is a pointer to the * end of used space of op. */ char *op; char *curop; int lenop; /* allocated size of op */ int sumlen; /* used size of op */ /* state for value's parser */ TSVectorParseState valstate;};/* parser's states */#define WAITOPERAND 1#define WAITOPERATOR 2#define WAITFIRSTOPERAND 3#define WAITSINGLEOPERAND 4/* * subroutine to parse the weight part, like ':1AB' of a query. */static char *get_weight(char *buf, int16 *weight){ *weight = 0; if (!t_iseq(buf, ':')) return buf; buf++; while (*buf && pg_mblen(buf) == 1) { switch (*buf) { case 'a': case 'A': *weight |= 1 << 3; break; case 'b': case 'B': *weight |= 1 << 2; break; case 'c': case 'C': *weight |= 1 << 1; break; case 'd': case 'D': *weight |= 1; break; default: return buf; } buf++; } return buf;}/* * token types for parsing */typedef enum{ PT_END = 0, PT_ERR = 1, PT_VAL = 2, PT_OPR = 3, PT_OPEN = 4, PT_CLOSE = 5} ts_tokentype;/* * get token from query string * * *operator is filled in with OP_* when return values is PT_OPR * *strval, *lenval and *weight are filled in when return value is PT_VAL */static ts_tokentypegettoken_query(TSQueryParserState state, int8 *operator, int *lenval, char **strval, int16 *weight){ while (1) { switch (state->state) { case WAITFIRSTOPERAND: case WAITOPERAND: if (t_iseq(state->buf, '!')) { (state->buf)++; /* can safely ++, t_iseq guarantee * that pg_mblen()==1 */ *operator = OP_NOT; state->state = WAITOPERAND; return PT_OPR; } else if (t_iseq(state->buf, '(')) { state->count++; (state->buf)++; state->state = WAITOPERAND; return PT_OPEN; } else if (t_iseq(state->buf, ':')) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error in tsquery: \"%s\"", state->buffer))); } else if (!t_isspace(state->buf)) { /* * We rely on the tsvector parser to parse the value for * us */ reset_tsvector_parser(state->valstate, state->buf); if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf)) { state->buf = get_weight(state->buf, weight); state->state = WAITOPERATOR; return PT_VAL; } else if (state->state == WAITFIRSTOPERAND) return PT_END; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("no operand in tsquery: \"%s\"", state->buffer))); } break; case WAITOPERATOR: if (t_iseq(state->buf, '&')) { state->state = WAITOPERAND; *operator = OP_AND; (state->buf)++; return PT_OPR; } if (t_iseq(state->buf, '|')) { state->state = WAITOPERAND; *operator = OP_OR; (state->buf)++; return PT_OPR; } else if (t_iseq(state->buf, ')')) { (state->buf)++; state->count--; return (state->count < 0) ? PT_ERR : PT_CLOSE; } else if (*(state->buf) == '\0') return (state->count) ? PT_ERR : PT_END; else if (!t_isspace(state->buf)) return PT_ERR; break; case WAITSINGLEOPERAND: if (*(state->buf) == '\0') return PT_END; *strval = state->buf; *lenval = strlen(state->buf); state->buf += strlen(state->buf); state->count++; return PT_VAL; default: return PT_ERR; break; } state->buf += pg_mblen(state->buf); } return PT_END;}/* * Push an operator to state->polstr */voidpushOperator(TSQueryParserState state, int8 oper){ QueryOperator *tmp; Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR); tmp = (QueryOperator *) palloc0(sizeof(QueryOperator)); tmp->type = QI_OPR; tmp->oper = oper; /* left is filled in later with findoprnd */ state->polstr = lcons(tmp, state->polstr);}static voidpushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight){ QueryOperand *tmp; if (distance >= MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("value is too big in tsquery: \"%s\"", state->buffer))); if (lenval >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("operand is too long in tsquery: \"%s\"", state->buffer))); tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); tmp->type = QI_VAL; tmp->weight = weight; tmp->valcrc = (int32) valcrc; tmp->length = lenval; tmp->distance = distance; state->polstr = lcons(tmp, state->polstr);}/* * Push an operand to state->polstr. * * strval must point to a string equal to state->curop. lenval is the length * of the string. */voidpushValue(TSQueryParserState state, char *strval, int lenval, int2 weight){ pg_crc32 valcrc; if (lenval >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long in tsquery: \"%s\"", state->buffer))); INIT_CRC32(valcrc); COMP_CRC32(valcrc, strval, lenval); FIN_CRC32(valcrc); pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight); /* append the value string to state.op, enlarging buffer if needed first */ while (state->curop - state->op + lenval + 1 >= state->lenop) { int used = state->curop - state->op; state->lenop *= 2; state->op = (char *) repalloc((void *) state->op, state->lenop); state->curop = state->op + used; } memcpy((void *) state->curop, (void *) strval, lenval); state->curop += lenval; *(state->curop) = '\0'; state->curop++; state->sumlen += lenval + 1 /* \0 */ ;}/* * Push a stopword placeholder to state->polstr */voidpushStop(TSQueryParserState state){ QueryOperand *tmp; tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); tmp->type = QI_VALSTOP; state->polstr = lcons(tmp, state->polstr);}#define STACKDEPTH 32/* * Make polish (prefix) notation of query. * * See parse_tsquery for explanation of pushval. */static voidmakepol(TSQueryParserState state, PushFunction pushval, Datum opaque){ int8 operator = 0; ts_tokentype type; int lenval = 0; char *strval = NULL; int8 opstack[STACKDEPTH]; int lenstack = 0; int16 weight = 0; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END) { switch (type) { case PT_VAL: pushval(opaque, state, strval, lenval, weight); while (lenstack && (opstack[lenstack - 1] == OP_AND || opstack[lenstack - 1] == OP_NOT)) { lenstack--; pushOperator(state, opstack[lenstack]); } break; case PT_OPR: if (lenstack && operator == OP_OR) pushOperator(state, OP_OR); else { if (lenstack == STACKDEPTH) /* internal error */ elog(ERROR, "tsquery stack too small"); opstack[lenstack] = operator; lenstack++; } break; case PT_OPEN: makepol(state, pushval, opaque); if (lenstack && (opstack[lenstack - 1] == OP_AND || opstack[lenstack - 1] == OP_NOT)) { lenstack--; pushOperator(state, opstack[lenstack]); } break; case PT_CLOSE: while (lenstack) { lenstack--; pushOperator(state, opstack[lenstack]); }; return; case PT_ERR: default: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error in tsquery: \"%s\"", state->buffer))); } } while (lenstack) { lenstack--; pushOperator(state, opstack[lenstack]); }}static voidfindoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes){ /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); if (*pos >= nnodes) elog(ERROR, "malformed tsquery: operand not found"); if (ptr[*pos].type == QI_VAL || ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here, they * haven't been cleaned away yet. */ { (*pos)++; } else { Assert(ptr[*pos].type == QI_OPR); if (ptr[*pos].operator.oper == OP_NOT) { ptr[*pos].operator.left = 1; (*pos)++; findoprnd_recurse(ptr, pos, nnodes); } else { QueryOperator *curitem = &ptr[*pos].operator; int tmp = *pos; Assert(curitem->oper == OP_AND || curitem->oper == OP_OR); (*pos)++; findoprnd_recurse(ptr, pos, nnodes); curitem->left = *pos - tmp; findoprnd_recurse(ptr, pos, nnodes); } }}/* * Fills in the left-fields previously left unfilled. The input * QueryItems must be in polish (prefix) notation. */static voidfindoprnd(QueryItem *ptr, int size){ uint32 pos; pos = 0; findoprnd_recurse(ptr, &pos, size); if (pos != size) elog(ERROR, "malformed tsquery: extra nodes");}/* * Each value (operand) in the query is be passed to pushval. pushval can * transform the simple value to an arbitrarily complex expression using * pushValue and pushOperator. It must push a single value with pushValue, * a complete expression with all operands, or a a stopword placeholder * with pushStop, otherwise the prefix notation representation will be broken, * having an operator with no operand. * * opaque is passed on to pushval as is, pushval can use it to store its * private state. * * The returned query might contain QI_STOPVAL nodes. The caller is responsible * for cleaning them up (with clean_fakeval) */TSQueryparse_tsquery(char *buf, PushFunction pushval, Datum opaque, bool isplain){ struct TSQueryParserStateData state; int i; TSQuery query; int commonlen; QueryItem *ptr; ListCell *cell; /* init state */ state.buffer = buf; state.buf = buf; state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; state.count = 0; state.polstr = NIL; /* init value parser's state */ state.valstate = init_tsvector_parser(state.buffer, true, true); /* init list of operand */ state.sumlen = 0; state.lenop = 64; state.curop = state.op = (char *) palloc(state.lenop);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?