tsvector_op.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 1,439 行 · 第 1/3 页

C
1,439
字号
/*------------------------------------------------------------------------- * * tsvector_op.c *	  operations over tsvector * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.12.2.1 2008/04/08 18:20:34 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "catalog/namespace.h"#include "commands/trigger.h"#include "executor/spi.h"#include "funcapi.h"#include "mb/pg_wchar.h"#include "miscadmin.h"#include "tsearch/ts_type.h"#include "tsearch/ts_utils.h"#include "utils/builtins.h"#include "utils/lsyscache.h"typedef struct{	WordEntry  *arrb;	WordEntry  *arre;	char	   *values;	char	   *operand;} CHKVAL;typedef struct{	uint32		cur;	TSVector	stat;} StatStorage;typedef struct{	uint32		len;	uint32		pos;	uint32		ndoc;	uint32		nentry;} StatEntry;typedef struct{	int32		vl_len_;		/* varlena header (do not touch directly!) */	int4		size;	int4		weight;	char		data[1];} tsstat;#define STATHDRSIZE (sizeof(int4) * 4)#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) )#define STATPTR(x)	( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) )#define STATSTRPTR(x)	( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((TSVector)(x))->size ) )#define STATSTRSIZE(x)	( VARSIZE((TSVector)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((TSVector)(x))->size ) )static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);/* * Check if datatype is the specified type or equivalent to it. * * Note: we could just do getBaseType() unconditionally, but since that's * a relatively expensive catalog lookup that most users won't need, we * try the straight comparison first. */static boolis_expected_type(Oid typid, Oid expected_type){	if (typid == expected_type)		return true;	typid = getBaseType(typid);	if (typid == expected_type)		return true;	return false;}/* Check if datatype is TEXT or binary-equivalent to it */static boolis_text_type(Oid typid){	/* varchar(n) and char(n) are binary-compatible with text */	if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)		return true;	/* Allow domains over these types, too */	typid = getBaseType(typid);	if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)		return true;	return false;}/* * Order: haspos, len, word, for all positions (pos, weight) */static intsilly_cmp_tsvector(const TSVector a, const TSVector b){	if (VARSIZE(a) < VARSIZE(b))		return -1;	else if (VARSIZE(a) > VARSIZE(b))		return 1;	else if (a->size < b->size)		return -1;	else if (a->size > b->size)		return 1;	else	{		WordEntry  *aptr = ARRPTR(a);		WordEntry  *bptr = ARRPTR(b);		int			i = 0;		int			res;		for (i = 0; i < a->size; i++)		{			if (aptr->haspos != bptr->haspos)			{				return (aptr->haspos > bptr->haspos) ? -1 : 1;			}			else if (aptr->len != bptr->len)			{				return (aptr->len > bptr->len) ? -1 : 1;			}			else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)			{				return res;			}			else if (aptr->haspos)			{				WordEntryPos *ap = POSDATAPTR(a, aptr);				WordEntryPos *bp = POSDATAPTR(b, bptr);				int			j;				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;				for (j = 0; j < POSDATALEN(a, aptr); j++)				{					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))					{						return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;					}					else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))					{						return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;					}					ap++, bp++;				}			}			aptr++;			bptr++;		}	}	return 0;}#define TSVECTORCMPFUNC( type, action, ret )			\Datum													\tsvector_##type(PG_FUNCTION_ARGS)						\{														\	TSVector	a = PG_GETARG_TSVECTOR(0);				\	TSVector	b = PG_GETARG_TSVECTOR(1);				\	int			res = silly_cmp_tsvector(a, b);			\	PG_FREE_IF_COPY(a,0);								\	PG_FREE_IF_COPY(b,1);								\	PG_RETURN_##ret( res action 0 );					\}TSVECTORCMPFUNC(lt, <, BOOL);TSVECTORCMPFUNC(le, <=, BOOL);TSVECTORCMPFUNC(eq, ==, BOOL);TSVECTORCMPFUNC(ge, >=, BOOL);TSVECTORCMPFUNC(gt, >, BOOL);TSVECTORCMPFUNC(ne, !=, BOOL);TSVECTORCMPFUNC(cmp, +, INT32);Datumtsvector_strip(PG_FUNCTION_ARGS){	TSVector	in = PG_GETARG_TSVECTOR(0);	TSVector	out;	int			i,				len = 0;	WordEntry  *arrin = ARRPTR(in),			   *arrout;	char	   *cur;	for (i = 0; i < in->size; i++)		len += arrin[i].len;	len = CALCDATASIZE(in->size, len);	out = (TSVector) palloc0(len);	SET_VARSIZE(out, len);	out->size = in->size;	arrout = ARRPTR(out);	cur = STRPTR(out);	for (i = 0; i < in->size; i++)	{		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);		arrout[i].haspos = 0;		arrout[i].len = arrin[i].len;		arrout[i].pos = cur - STRPTR(out);		cur += arrout[i].len;	}	PG_FREE_IF_COPY(in, 0);	PG_RETURN_POINTER(out);}Datumtsvector_length(PG_FUNCTION_ARGS){	TSVector	in = PG_GETARG_TSVECTOR(0);	int4		ret = in->size;	PG_FREE_IF_COPY(in, 0);	PG_RETURN_INT32(ret);}Datumtsvector_setweight(PG_FUNCTION_ARGS){	TSVector	in = PG_GETARG_TSVECTOR(0);	char		cw = PG_GETARG_CHAR(1);	TSVector	out;	int			i,				j;	WordEntry  *entry;	WordEntryPos *p;	int			w = 0;	switch (cw)	{		case 'A':		case 'a':			w = 3;			break;		case 'B':		case 'b':			w = 2;			break;		case 'C':		case 'c':			w = 1;			break;		case 'D':		case 'd':			w = 0;			break;		default:			/* internal error */			elog(ERROR, "unrecognized weight: %d", cw);	}	out = (TSVector) palloc(VARSIZE(in));	memcpy(out, in, VARSIZE(in));	entry = ARRPTR(out);	i = out->size;	while (i--)	{		if ((j = POSDATALEN(out, entry)) != 0)		{			p = POSDATAPTR(out, entry);			while (j--)			{				WEP_SETWEIGHT(*p, w);				p++;			}		}		entry++;	}	PG_FREE_IF_COPY(in, 0);	PG_RETURN_POINTER(out);}static intcompareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b){	if (a->len == b->len)	{		return strncmp(					   ptra + a->pos,					   ptrb + b->pos,					   a->len);	}	return (a->len > b->len) ? 1 : -1;}/* * Add positions from src to dest after offsetting them by maxpos. * Return the number added (might be less than expected due to overflow) */static int4add_pos(TSVector src, WordEntry *srcptr,		TSVector dest, WordEntry *destptr,		int4 maxpos){	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;	int			i;	uint16		slen = POSDATALEN(src, srcptr),				startlen;	WordEntryPos *spos = POSDATAPTR(src, srcptr),			   *dpos = POSDATAPTR(dest, destptr);	if (!destptr->haspos)		*clen = 0;	startlen = *clen;	for (i = 0;		 i < slen && *clen < MAXNUMPOS &&		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);		 i++)	{		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));		(*clen)++;	}	if (*clen != startlen)		destptr->haspos = 1;	return *clen - startlen;}Datumtsvector_concat(PG_FUNCTION_ARGS){	TSVector	in1 = PG_GETARG_TSVECTOR(0);	TSVector	in2 = PG_GETARG_TSVECTOR(1);	TSVector	out;	WordEntry  *ptr;	WordEntry  *ptr1,			   *ptr2;	WordEntryPos *p;	int			maxpos = 0,				i,				j,				i1,				i2,				dataoff;	char	   *data,			   *data1,			   *data2;	ptr = ARRPTR(in1);	i = in1->size;	while (i--)	{		if ((j = POSDATALEN(in1, ptr)) != 0)		{			p = POSDATAPTR(in1, ptr);			while (j--)			{				if (WEP_GETPOS(*p) > maxpos)					maxpos = WEP_GETPOS(*p);				p++;			}		}		ptr++;	}	ptr1 = ARRPTR(in1);	ptr2 = ARRPTR(in2);	data1 = STRPTR(in1);	data2 = STRPTR(in2);	i1 = in1->size;	i2 = in2->size;	/* conservative estimate of space needed */	out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2));	SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));	out->size = in1->size + in2->size;	ptr = ARRPTR(out);	data = STRPTR(out);	dataoff = 0;	while (i1 && i2)	{		int			cmp = compareEntry(data1, ptr1, data2, ptr2);		if (cmp < 0)		{						/* in1 first */			ptr->haspos = ptr1->haspos;			ptr->len = ptr1->len;			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);			ptr->pos = dataoff;			dataoff += ptr1->len;			if (ptr->haspos)			{				dataoff = SHORTALIGN(dataoff);				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);			}			ptr++;			ptr1++;			i1--;		}		else if (cmp > 0)		{						/* in2 first */			ptr->haspos = ptr2->haspos;			ptr->len = ptr2->len;			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);			ptr->pos = dataoff;			dataoff += ptr2->len;			if (ptr->haspos)			{				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);				if (addlen == 0)					ptr->haspos = 0;				else				{					dataoff = SHORTALIGN(dataoff);					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);				}			}			ptr++;			ptr2++;			i2--;		}		else		{			ptr->haspos = ptr1->haspos | ptr2->haspos;			ptr->len = ptr1->len;			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);			ptr->pos = dataoff;			dataoff += ptr1->len;			if (ptr->haspos)			{				if (ptr1->haspos)				{					dataoff = SHORTALIGN(dataoff);					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);					if (ptr2->haspos)						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);				}				else	/* must have ptr2->haspos */				{					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);					if (addlen == 0)						ptr->haspos = 0;					else					{						dataoff = SHORTALIGN(dataoff);						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);					}				}			}			ptr++;			ptr1++;			ptr2++;			i1--;			i2--;		}	}	while (i1)	{		ptr->haspos = ptr1->haspos;		ptr->len = ptr1->len;		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);		ptr->pos = dataoff;		dataoff += ptr1->len;		if (ptr->haspos)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?