tsvector_op.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 1,439 行 · 第 1/3 页
C
1,439 行
/*------------------------------------------------------------------------- * * tsvector_op.c * operations over tsvector * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.12.2.1 2008/04/08 18:20:34 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "catalog/namespace.h"#include "commands/trigger.h"#include "executor/spi.h"#include "funcapi.h"#include "mb/pg_wchar.h"#include "miscadmin.h"#include "tsearch/ts_type.h"#include "tsearch/ts_utils.h"#include "utils/builtins.h"#include "utils/lsyscache.h"typedef struct{ WordEntry *arrb; WordEntry *arre; char *values; char *operand;} CHKVAL;typedef struct{ uint32 cur; TSVector stat;} StatStorage;typedef struct{ uint32 len; uint32 pos; uint32 ndoc; uint32 nentry;} StatEntry;typedef struct{ int32 vl_len_; /* varlena header (do not touch directly!) */ int4 size; int4 weight; char data[1];} tsstat;#define STATHDRSIZE (sizeof(int4) * 4)#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) )#define STATPTR(x) ( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) )#define STATSTRPTR(x) ( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((TSVector)(x))->size ) )#define STATSTRSIZE(x) ( VARSIZE((TSVector)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((TSVector)(x))->size ) )static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);/* * Check if datatype is the specified type or equivalent to it. * * Note: we could just do getBaseType() unconditionally, but since that's * a relatively expensive catalog lookup that most users won't need, we * try the straight comparison first. */static boolis_expected_type(Oid typid, Oid expected_type){ if (typid == expected_type) return true; typid = getBaseType(typid); if (typid == expected_type) return true; return false;}/* Check if datatype is TEXT or binary-equivalent to it */static boolis_text_type(Oid typid){ /* varchar(n) and char(n) are binary-compatible with text */ if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID) return true; /* Allow domains over these types, too */ typid = getBaseType(typid); if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID) return true; return false;}/* * Order: haspos, len, word, for all positions (pos, weight) */static intsilly_cmp_tsvector(const TSVector a, const TSVector b){ if (VARSIZE(a) < VARSIZE(b)) return -1; else if (VARSIZE(a) > VARSIZE(b)) return 1; else if (a->size < b->size) return -1; else if (a->size > b->size) return 1; else { WordEntry *aptr = ARRPTR(a); WordEntry *bptr = ARRPTR(b); int i = 0; int res; for (i = 0; i < a->size; i++) { if (aptr->haspos != bptr->haspos) { return (aptr->haspos > bptr->haspos) ? -1 : 1; } else if (aptr->len != bptr->len) { return (aptr->len > bptr->len) ? -1 : 1; } else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0) { return res; } else if (aptr->haspos) { WordEntryPos *ap = POSDATAPTR(a, aptr); WordEntryPos *bp = POSDATAPTR(b, bptr); int j; if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr)) return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1; for (j = 0; j < POSDATALEN(a, aptr); j++) { if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp)) { return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1; } else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp)) { return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1; } ap++, bp++; } } aptr++; bptr++; } } return 0;}#define TSVECTORCMPFUNC( type, action, ret ) \Datum \tsvector_##type(PG_FUNCTION_ARGS) \{ \ TSVector a = PG_GETARG_TSVECTOR(0); \ TSVector b = PG_GETARG_TSVECTOR(1); \ int res = silly_cmp_tsvector(a, b); \ PG_FREE_IF_COPY(a,0); \ PG_FREE_IF_COPY(b,1); \ PG_RETURN_##ret( res action 0 ); \}TSVECTORCMPFUNC(lt, <, BOOL);TSVECTORCMPFUNC(le, <=, BOOL);TSVECTORCMPFUNC(eq, ==, BOOL);TSVECTORCMPFUNC(ge, >=, BOOL);TSVECTORCMPFUNC(gt, >, BOOL);TSVECTORCMPFUNC(ne, !=, BOOL);TSVECTORCMPFUNC(cmp, +, INT32);Datumtsvector_strip(PG_FUNCTION_ARGS){ TSVector in = PG_GETARG_TSVECTOR(0); TSVector out; int i, len = 0; WordEntry *arrin = ARRPTR(in), *arrout; char *cur; for (i = 0; i < in->size; i++) len += arrin[i].len; len = CALCDATASIZE(in->size, len); out = (TSVector) palloc0(len); SET_VARSIZE(out, len); out->size = in->size; arrout = ARRPTR(out); cur = STRPTR(out); for (i = 0; i < in->size; i++) { memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len); arrout[i].haspos = 0; arrout[i].len = arrin[i].len; arrout[i].pos = cur - STRPTR(out); cur += arrout[i].len; } PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(out);}Datumtsvector_length(PG_FUNCTION_ARGS){ TSVector in = PG_GETARG_TSVECTOR(0); int4 ret = in->size; PG_FREE_IF_COPY(in, 0); PG_RETURN_INT32(ret);}Datumtsvector_setweight(PG_FUNCTION_ARGS){ TSVector in = PG_GETARG_TSVECTOR(0); char cw = PG_GETARG_CHAR(1); TSVector out; int i, j; WordEntry *entry; WordEntryPos *p; int w = 0; switch (cw) { case 'A': case 'a': w = 3; break; case 'B': case 'b': w = 2; break; case 'C': case 'c': w = 1; break; case 'D': case 'd': w = 0; break; default: /* internal error */ elog(ERROR, "unrecognized weight: %d", cw); } out = (TSVector) palloc(VARSIZE(in)); memcpy(out, in, VARSIZE(in)); entry = ARRPTR(out); i = out->size; while (i--) { if ((j = POSDATALEN(out, entry)) != 0) { p = POSDATAPTR(out, entry); while (j--) { WEP_SETWEIGHT(*p, w); p++; } } entry++; } PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(out);}static intcompareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b){ if (a->len == b->len) { return strncmp( ptra + a->pos, ptrb + b->pos, a->len); } return (a->len > b->len) ? 1 : -1;}/* * Add positions from src to dest after offsetting them by maxpos. * Return the number added (might be less than expected due to overflow) */static int4add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int4 maxpos){ uint16 *clen = &_POSVECPTR(dest, destptr)->npos; int i; uint16 slen = POSDATALEN(src, srcptr), startlen; WordEntryPos *spos = POSDATAPTR(src, srcptr), *dpos = POSDATAPTR(dest, destptr); if (!destptr->haspos) *clen = 0; startlen = *clen; for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++) { WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i])); WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos)); (*clen)++; } if (*clen != startlen) destptr->haspos = 1; return *clen - startlen;}Datumtsvector_concat(PG_FUNCTION_ARGS){ TSVector in1 = PG_GETARG_TSVECTOR(0); TSVector in2 = PG_GETARG_TSVECTOR(1); TSVector out; WordEntry *ptr; WordEntry *ptr1, *ptr2; WordEntryPos *p; int maxpos = 0, i, j, i1, i2, dataoff; char *data, *data1, *data2; ptr = ARRPTR(in1); i = in1->size; while (i--) { if ((j = POSDATALEN(in1, ptr)) != 0) { p = POSDATAPTR(in1, ptr); while (j--) { if (WEP_GETPOS(*p) > maxpos) maxpos = WEP_GETPOS(*p); p++; } } ptr++; } ptr1 = ARRPTR(in1); ptr2 = ARRPTR(in2); data1 = STRPTR(in1); data2 = STRPTR(in2); i1 = in1->size; i2 = in2->size; /* conservative estimate of space needed */ out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2)); SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2)); out->size = in1->size + in2->size; ptr = ARRPTR(out); data = STRPTR(out); dataoff = 0; while (i1 && i2) { int cmp = compareEntry(data1, ptr1, data2, ptr2); if (cmp < 0) { /* in1 first */ ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } else if (cmp > 0) { /* in2 first */ ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } else { ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { if (ptr1->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); } else /* must have ptr2->haspos */ { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } } ptr++; ptr1++; ptr2++; i1--; i2--; } } while (i1) { ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?