tsrank.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 857 行 · 第 1/2 页

C
857
字号
	PG_FREE_IF_COPY(win, 0);	PG_FREE_IF_COPY(txt, 1);	PG_FREE_IF_COPY(query, 2);	PG_RETURN_FLOAT4(res);}Datumts_rank_ttf(PG_FUNCTION_ARGS){	TSVector	txt = PG_GETARG_TSVECTOR(0);	TSQuery		query = PG_GETARG_TSQUERY(1);	int			method = PG_GETARG_INT32(2);	float		res;	res = calc_rank(getWeights(NULL), txt, query, method);	PG_FREE_IF_COPY(txt, 0);	PG_FREE_IF_COPY(query, 1);	PG_RETURN_FLOAT4(res);}Datumts_rank_tt(PG_FUNCTION_ARGS){	TSVector	txt = PG_GETARG_TSVECTOR(0);	TSQuery		query = PG_GETARG_TSQUERY(1);	float		res;	res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);	PG_FREE_IF_COPY(txt, 0);	PG_FREE_IF_COPY(query, 1);	PG_RETURN_FLOAT4(res);}typedef struct{	QueryItem **item;	int16		nitem;	uint8		wclass;	int32		pos;} DocRepresentation;static intcompareDocR(const void *va, const void *vb){	DocRepresentation *a = (DocRepresentation *) va;	DocRepresentation *b = (DocRepresentation *) vb;	if (a->pos == b->pos)		return 0;	return (a->pos > b->pos) ? 1 : -1;}typedef struct{	TSQuery		query;	bool	   *operandexist;} QueryRepresentation;#define QR_GET_OPERAND_EXISTS(q, v)		( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )#define QR_SET_OPERAND_EXISTS(q, v)  QR_GET_OPERAND_EXISTS(q,v) = truestatic boolcheckcondition_QueryOperand(void *checkval, QueryOperand *val){	QueryRepresentation *qr = (QueryRepresentation *) checkval;	return QR_GET_OPERAND_EXISTS(qr, val);}typedef struct{	int			pos;	int			p;	int			q;	DocRepresentation *begin;	DocRepresentation *end;} Extention;static boolCover(DocRepresentation *doc, int len, QueryRepresentation *qr, Extention *ext){	DocRepresentation *ptr;	int			lastpos = ext->pos;	int			i;	bool		found = false;	/*	 * since this function recurses, it could be driven to stack overflow.	 * (though any decent compiler will optimize away the tail-recursion.	 */	check_stack_depth();	memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);	ext->p = 0x7fffffff;	ext->q = 0;	ptr = doc + ext->pos;	/* find upper bound of cover from current position, move up */	while (ptr - doc < len)	{		for (i = 0; i < ptr->nitem; i++)		{			if (ptr->item[i]->type == QI_VAL)				QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);		}		if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))		{			if (ptr->pos > ext->q)			{				ext->q = ptr->pos;				ext->end = ptr;				lastpos = ptr - doc;				found = true;			}			break;		}		ptr++;	}	if (!found)		return false;	memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);	ptr = doc + lastpos;	/* find lower bound of cover from found upper bound, move down */	while (ptr >= doc + ext->pos)	{		for (i = 0; i < ptr->nitem; i++)			if (ptr->item[i]->type == QI_VAL)				QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);		if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))		{			if (ptr->pos < ext->p)			{				ext->begin = ptr;				ext->p = ptr->pos;			}			break;		}		ptr--;	}	if (ext->p <= ext->q)	{		/*		 * set position for next try to next lexeme after begining of founded		 * cover		 */		ext->pos = (ptr - doc) + 1;		return true;	}	ext->pos++;	return Cover(doc, len, qr, ext);}static DocRepresentation *get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen){	QueryItem  *item = GETQUERY(qr->query);	WordEntry  *entry;	WordEntryPos *post;	int4		dimt,				j,				i;	int			len = qr->query->size * 4,				cur = 0;	DocRepresentation *doc;	char	   *operand;	doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);	operand = GETOPERAND(qr->query);	for (i = 0; i < qr->query->size; i++)	{		QueryOperand *curoperand;		if (item[i].type != QI_VAL)			continue;		curoperand = &item[i].operand;		if (QR_GET_OPERAND_EXISTS(qr, &item[i]))			continue;		entry = find_wordentry(txt, qr->query, curoperand);		if (!entry)			continue;		if (entry->haspos)		{			dimt = POSDATALEN(txt, entry);			post = POSDATAPTR(txt, entry);		}		else		{			dimt = POSNULL.npos;			post = POSNULL.pos;		}		while (cur + dimt >= len)		{			len *= 2;			doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);		}		for (j = 0; j < dimt; j++)		{			if (j == 0)			{				int			k;				doc[cur].nitem = 0;				doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);				for (k = 0; k < qr->query->size; k++)				{					QueryOperand *kptr = &item[k].operand;					QueryOperand *iptr = &item[i].operand;					if (k == i ||						(item[k].type == QI_VAL &&						 compareQueryOperand(&kptr, &iptr, operand) == 0))					{						/*						 * if k == i, we've already checked above that it's						 * type == Q_VAL						 */						doc[cur].item[doc[cur].nitem] = item + k;						doc[cur].nitem++;						QR_SET_OPERAND_EXISTS(qr, item + k);					}				}			}			else			{				doc[cur].nitem = doc[cur - 1].nitem;				doc[cur].item = doc[cur - 1].item;			}			doc[cur].pos = WEP_GETPOS(post[j]);			doc[cur].wclass = WEP_GETWEIGHT(post[j]);			cur++;		}	}	*doclen = cur;	if (cur > 0)	{		qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);		return doc;	}	pfree(doc);	return NULL;}static float4calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method){	DocRepresentation *doc;	int			len,				i,				doclen = 0;	Extention	ext;	double		Wdoc = 0.0;	double		invws[lengthof(weights)];	double		SumDist = 0.0,				PrevExtPos = 0.0,				CurExtPos = 0.0;	int			NExtent = 0;	QueryRepresentation qr;	for (i = 0; i < lengthof(weights); i++)	{		invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));		if (invws[i] > 1.0)			ereport(ERROR,					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),					 errmsg("weight out of range")));		invws[i] = 1.0 / invws[i];	}	qr.query = query;	qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);	doc = get_docrep(txt, &qr, &doclen);	if (!doc)	{		pfree(qr.operandexist);		return 0.0;	}	MemSet(&ext, 0, sizeof(Extention));	while (Cover(doc, doclen, &qr, &ext))	{		double		Cpos = 0.0;		double		InvSum = 0.0;		int			nNoise;		DocRepresentation *ptr = ext.begin;		while (ptr <= ext.end)		{			InvSum += invws[ptr->wclass];			ptr++;		}		Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;		/*		 * if doc are big enough then ext.q may be equal to ext.p due to limit		 * of posional information. In this case we approximate number of		 * noise word as half cover's length		 */		nNoise = (ext.q - ext.p) - (ext.end - ext.begin);		if (nNoise < 0)			nNoise = (ext.end - ext.begin) / 2;		Wdoc += Cpos / ((double) (1 + nNoise));		CurExtPos = ((double) (ext.q + ext.p)) / 2.0;		if (NExtent > 0 && CurExtPos > PrevExtPos		/* prevent devision by														 * zero in a case of				multiple lexize */ )			SumDist += 1.0 / (CurExtPos - PrevExtPos);		PrevExtPos = CurExtPos;		NExtent++;	}	if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)		Wdoc /= log((double) (cnt_length(txt) + 1));	if (method & RANK_NORM_LENGTH)	{		len = cnt_length(txt);		if (len > 0)			Wdoc /= (double) len;	}	if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)		Wdoc /= ((double) NExtent) / SumDist;	if ((method & RANK_NORM_UNIQ) && txt->size > 0)		Wdoc /= (double) (txt->size);	if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)		Wdoc /= log((double) (txt->size + 1)) / log(2.0);	if (method & RANK_NORM_RDIVRPLUS1)		Wdoc /= (Wdoc + 1);	pfree(doc);	pfree(qr.operandexist);	return (float4) Wdoc;}Datumts_rankcd_wttf(PG_FUNCTION_ARGS){	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));	TSVector	txt = PG_GETARG_TSVECTOR(1);	TSQuery		query = PG_GETARG_TSQUERY(2);	int			method = PG_GETARG_INT32(3);	float		res;	res = calc_rank_cd(getWeights(win), txt, query, method);	PG_FREE_IF_COPY(win, 0);	PG_FREE_IF_COPY(txt, 1);	PG_FREE_IF_COPY(query, 2);	PG_RETURN_FLOAT4(res);}Datumts_rankcd_wtt(PG_FUNCTION_ARGS){	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));	TSVector	txt = PG_GETARG_TSVECTOR(1);	TSQuery		query = PG_GETARG_TSQUERY(2);	float		res;	res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);	PG_FREE_IF_COPY(win, 0);	PG_FREE_IF_COPY(txt, 1);	PG_FREE_IF_COPY(query, 2);	PG_RETURN_FLOAT4(res);}Datumts_rankcd_ttf(PG_FUNCTION_ARGS){	TSVector	txt = PG_GETARG_TSVECTOR(0);	TSQuery		query = PG_GETARG_TSQUERY(1);	int			method = PG_GETARG_INT32(2);	float		res;	res = calc_rank_cd(getWeights(NULL), txt, query, method);	PG_FREE_IF_COPY(txt, 0);	PG_FREE_IF_COPY(query, 1);	PG_RETURN_FLOAT4(res);}Datumts_rankcd_tt(PG_FUNCTION_ARGS){	TSVector	txt = PG_GETARG_TSVECTOR(0);	TSQuery		query = PG_GETARG_TSQUERY(1);	float		res;	res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);	PG_FREE_IF_COPY(txt, 0);	PG_FREE_IF_COPY(query, 1);	PG_RETURN_FLOAT4(res);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?