tsrank.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 857 行 · 第 1/2 页
C
857 行
PG_FREE_IF_COPY(win, 0); PG_FREE_IF_COPY(txt, 1); PG_FREE_IF_COPY(query, 2); PG_RETURN_FLOAT4(res);}Datumts_rank_ttf(PG_FUNCTION_ARGS){ TSVector txt = PG_GETARG_TSVECTOR(0); TSQuery query = PG_GETARG_TSQUERY(1); int method = PG_GETARG_INT32(2); float res; res = calc_rank(getWeights(NULL), txt, query, method); PG_FREE_IF_COPY(txt, 0); PG_FREE_IF_COPY(query, 1); PG_RETURN_FLOAT4(res);}Datumts_rank_tt(PG_FUNCTION_ARGS){ TSVector txt = PG_GETARG_TSVECTOR(0); TSQuery query = PG_GETARG_TSQUERY(1); float res; res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD); PG_FREE_IF_COPY(txt, 0); PG_FREE_IF_COPY(query, 1); PG_RETURN_FLOAT4(res);}typedef struct{ QueryItem **item; int16 nitem; uint8 wclass; int32 pos;} DocRepresentation;static intcompareDocR(const void *va, const void *vb){ DocRepresentation *a = (DocRepresentation *) va; DocRepresentation *b = (DocRepresentation *) vb; if (a->pos == b->pos) return 0; return (a->pos > b->pos) ? 1 : -1;}typedef struct{ TSQuery query; bool *operandexist;} QueryRepresentation;#define QR_GET_OPERAND_EXISTS(q, v) ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )#define QR_SET_OPERAND_EXISTS(q, v) QR_GET_OPERAND_EXISTS(q,v) = truestatic boolcheckcondition_QueryOperand(void *checkval, QueryOperand *val){ QueryRepresentation *qr = (QueryRepresentation *) checkval; return QR_GET_OPERAND_EXISTS(qr, val);}typedef struct{ int pos; int p; int q; DocRepresentation *begin; DocRepresentation *end;} Extention;static boolCover(DocRepresentation *doc, int len, QueryRepresentation *qr, Extention *ext){ DocRepresentation *ptr; int lastpos = ext->pos; int i; bool found = false; /* * since this function recurses, it could be driven to stack overflow. * (though any decent compiler will optimize away the tail-recursion. */ check_stack_depth(); memset(qr->operandexist, 0, sizeof(bool) * qr->query->size); ext->p = 0x7fffffff; ext->q = 0; ptr = doc + ext->pos; /* find upper bound of cover from current position, move up */ while (ptr - doc < len) { for (i = 0; i < ptr->nitem; i++) { if (ptr->item[i]->type == QI_VAL) QR_SET_OPERAND_EXISTS(qr, ptr->item[i]); } if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand)) { if (ptr->pos > ext->q) { ext->q = ptr->pos; ext->end = ptr; lastpos = ptr - doc; found = true; } break; } ptr++; } if (!found) return false; memset(qr->operandexist, 0, sizeof(bool) * qr->query->size); ptr = doc + lastpos; /* find lower bound of cover from found upper bound, move down */ while (ptr >= doc + ext->pos) { for (i = 0; i < ptr->nitem; i++) if (ptr->item[i]->type == QI_VAL) QR_SET_OPERAND_EXISTS(qr, ptr->item[i]); if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand)) { if (ptr->pos < ext->p) { ext->begin = ptr; ext->p = ptr->pos; } break; } ptr--; } if (ext->p <= ext->q) { /* * set position for next try to next lexeme after begining of founded * cover */ ext->pos = (ptr - doc) + 1; return true; } ext->pos++; return Cover(doc, len, qr, ext);}static DocRepresentation *get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen){ QueryItem *item = GETQUERY(qr->query); WordEntry *entry; WordEntryPos *post; int4 dimt, j, i; int len = qr->query->size * 4, cur = 0; DocRepresentation *doc; char *operand; doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); operand = GETOPERAND(qr->query); for (i = 0; i < qr->query->size; i++) { QueryOperand *curoperand; if (item[i].type != QI_VAL) continue; curoperand = &item[i].operand; if (QR_GET_OPERAND_EXISTS(qr, &item[i])) continue; entry = find_wordentry(txt, qr->query, curoperand); if (!entry) continue; if (entry->haspos) { dimt = POSDATALEN(txt, entry); post = POSDATAPTR(txt, entry); } else { dimt = POSNULL.npos; post = POSNULL.pos; } while (cur + dimt >= len) { len *= 2; doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); } for (j = 0; j < dimt; j++) { if (j == 0) { int k; doc[cur].nitem = 0; doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); for (k = 0; k < qr->query->size; k++) { QueryOperand *kptr = &item[k].operand; QueryOperand *iptr = &item[i].operand; if (k == i || (item[k].type == QI_VAL && compareQueryOperand(&kptr, &iptr, operand) == 0)) { /* * if k == i, we've already checked above that it's * type == Q_VAL */ doc[cur].item[doc[cur].nitem] = item + k; doc[cur].nitem++; QR_SET_OPERAND_EXISTS(qr, item + k); } } } else { doc[cur].nitem = doc[cur - 1].nitem; doc[cur].item = doc[cur - 1].item; } doc[cur].pos = WEP_GETPOS(post[j]); doc[cur].wclass = WEP_GETWEIGHT(post[j]); cur++; } } *doclen = cur; if (cur > 0) { qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); return doc; } pfree(doc); return NULL;}static float4calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method){ DocRepresentation *doc; int len, i, doclen = 0; Extention ext; double Wdoc = 0.0; double invws[lengthof(weights)]; double SumDist = 0.0, PrevExtPos = 0.0, CurExtPos = 0.0; int NExtent = 0; QueryRepresentation qr; for (i = 0; i < lengthof(weights); i++) { invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i])); if (invws[i] > 1.0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("weight out of range"))); invws[i] = 1.0 / invws[i]; } qr.query = query; qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size); doc = get_docrep(txt, &qr, &doclen); if (!doc) { pfree(qr.operandexist); return 0.0; } MemSet(&ext, 0, sizeof(Extention)); while (Cover(doc, doclen, &qr, &ext)) { double Cpos = 0.0; double InvSum = 0.0; int nNoise; DocRepresentation *ptr = ext.begin; while (ptr <= ext.end) { InvSum += invws[ptr->wclass]; ptr++; } Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum; /* * if doc are big enough then ext.q may be equal to ext.p due to limit * of posional information. In this case we approximate number of * noise word as half cover's length */ nNoise = (ext.q - ext.p) - (ext.end - ext.begin); if (nNoise < 0) nNoise = (ext.end - ext.begin) / 2; Wdoc += Cpos / ((double) (1 + nNoise)); CurExtPos = ((double) (ext.q + ext.p)) / 2.0; if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by * zero in a case of multiple lexize */ ) SumDist += 1.0 / (CurExtPos - PrevExtPos); PrevExtPos = CurExtPos; NExtent++; } if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0) Wdoc /= log((double) (cnt_length(txt) + 1)); if (method & RANK_NORM_LENGTH) { len = cnt_length(txt); if (len > 0) Wdoc /= (double) len; } if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0) Wdoc /= ((double) NExtent) / SumDist; if ((method & RANK_NORM_UNIQ) && txt->size > 0) Wdoc /= (double) (txt->size); if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) Wdoc /= log((double) (txt->size + 1)) / log(2.0); if (method & RANK_NORM_RDIVRPLUS1) Wdoc /= (Wdoc + 1); pfree(doc); pfree(qr.operandexist); return (float4) Wdoc;}Datumts_rankcd_wttf(PG_FUNCTION_ARGS){ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); TSVector txt = PG_GETARG_TSVECTOR(1); TSQuery query = PG_GETARG_TSQUERY(2); int method = PG_GETARG_INT32(3); float res; res = calc_rank_cd(getWeights(win), txt, query, method); PG_FREE_IF_COPY(win, 0); PG_FREE_IF_COPY(txt, 1); PG_FREE_IF_COPY(query, 2); PG_RETURN_FLOAT4(res);}Datumts_rankcd_wtt(PG_FUNCTION_ARGS){ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); TSVector txt = PG_GETARG_TSVECTOR(1); TSQuery query = PG_GETARG_TSQUERY(2); float res; res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD); PG_FREE_IF_COPY(win, 0); PG_FREE_IF_COPY(txt, 1); PG_FREE_IF_COPY(query, 2); PG_RETURN_FLOAT4(res);}Datumts_rankcd_ttf(PG_FUNCTION_ARGS){ TSVector txt = PG_GETARG_TSVECTOR(0); TSQuery query = PG_GETARG_TSQUERY(1); int method = PG_GETARG_INT32(2); float res; res = calc_rank_cd(getWeights(NULL), txt, query, method); PG_FREE_IF_COPY(txt, 0); PG_FREE_IF_COPY(query, 1); PG_RETURN_FLOAT4(res);}Datumts_rankcd_tt(PG_FUNCTION_ARGS){ TSVector txt = PG_GETARG_TSVECTOR(0); TSQuery query = PG_GETARG_TSQUERY(1); float res; res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD); PG_FREE_IF_COPY(txt, 0); PG_FREE_IF_COPY(query, 1); PG_RETURN_FLOAT4(res);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?