selfuncs.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 2,384 行 · 第 1/5 页

C
2,384
字号
		float4	   *numbers2 = NULL;		int			nnumbers2 = 0;		if (var1 != NULL)		{			/* get stats for the attribute, if available */			Oid			relid1 = getrelid(var1->varno, root->rtable);			if (relid1 != InvalidOid)			{				statsTuple1 = SearchSysCache(STATRELATT,											 ObjectIdGetDatum(relid1),										   Int16GetDatum(var1->varattno),											 0, 0);				if (HeapTupleIsValid(statsTuple1))				{					stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);					have_mcvs1 = get_attstatsslot(statsTuple1,												  var1->vartype,												  var1->vartypmod,												  STATISTIC_KIND_MCV,												  InvalidOid,												  &values1, &nvalues1,												  &numbers1, &nnumbers1);				}				nd1 = get_att_numdistinct(root, var1, stats1);			}		}		if (var2 != NULL)		{			/* get stats for the attribute, if available */			Oid			relid2 = getrelid(var2->varno, root->rtable);			if (relid2 != InvalidOid)			{				statsTuple2 = SearchSysCache(STATRELATT,											 ObjectIdGetDatum(relid2),										   Int16GetDatum(var2->varattno),											 0, 0);				if (HeapTupleIsValid(statsTuple2))				{					stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);					have_mcvs2 = get_attstatsslot(statsTuple2,												  var2->vartype,												  var2->vartypmod,												  STATISTIC_KIND_MCV,												  InvalidOid,												  &values2, &nvalues2,												  &numbers2, &nnumbers2);				}				nd2 = get_att_numdistinct(root, var2, stats2);			}		}		if (have_mcvs1 && have_mcvs2)		{			/*			 * We have most-common-value lists for both relations.	Run			 * through the lists to see which MCVs actually join to each			 * other with the given operator.  This allows us to determine			 * the exact join selectivity for the portion of the relations			 * represented by the MCV lists.  We still have to estimate			 * for the remaining population, but in a skewed distribution			 * this gives us a big leg up in accuracy.	For motivation see			 * the analysis in Y. Ioannidis and S. Christodoulakis, "On			 * the propagation of errors in the size of join results",			 * Technical Report 1018, Computer Science Dept., University			 * of Wisconsin, Madison, March 1991 (available from			 * ftp.cs.wisc.edu).			 */			FmgrInfo	eqproc;			bool	   *hasmatch1;			bool	   *hasmatch2;			double		nullfrac1 = stats1->stanullfrac;			double		nullfrac2 = stats2->stanullfrac;			double		matchprodfreq,						matchfreq1,						matchfreq2,						unmatchfreq1,						unmatchfreq2,						otherfreq1,						otherfreq2,						totalsel1,						totalsel2;			int			i,						nmatches;			fmgr_info(get_opcode(operator), &eqproc);			hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));			hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));			/*			 * If we are doing any variant of JOIN_IN, pretend all the			 * values of the righthand relation are unique (ie, act as if			 * it's been DISTINCT'd).			 *			 * NOTE: it might seem that we should unique-ify the lefthand			 * input when considering JOIN_REVERSE_IN.	But this is not			 * so, because the join clause we've been handed has not been			 * commuted from the way the parser originally wrote it.  We			 * know that the unique side of the IN clause is *always* on			 * the right.			 *			 * NOTE: it would be dangerous to try to be smart about JOIN_LEFT			 * or JOIN_RIGHT here, because we do not have enough			 * information to determine which var is really on which side			 * of the join. Perhaps someday we should pass in more			 * information.			 */			if (jointype == JOIN_IN ||				jointype == JOIN_REVERSE_IN ||				jointype == JOIN_UNIQUE_INNER ||				jointype == JOIN_UNIQUE_OUTER)			{				float4		oneovern = 1.0 / nd2;				for (i = 0; i < nvalues2; i++)					numbers2[i] = oneovern;				nullfrac2 = oneovern;			}			/*			 * Note we assume that each MCV will match at most one member			 * of the other MCV list.  If the operator isn't really			 * equality, there could be multiple matches --- but we don't			 * look for them, both for speed and because the math wouldn't			 * add up...			 */			matchprodfreq = 0.0;			nmatches = 0;			for (i = 0; i < nvalues1; i++)			{				int			j;				for (j = 0; j < nvalues2; j++)				{					if (hasmatch2[j])						continue;					if (DatumGetBool(FunctionCall2(&eqproc,												   values1[i],												   values2[j])))					{						hasmatch1[i] = hasmatch2[j] = true;						matchprodfreq += numbers1[i] * numbers2[j];						nmatches++;						break;					}				}			}			CLAMP_PROBABILITY(matchprodfreq);			/* Sum up frequencies of matched and unmatched MCVs */			matchfreq1 = unmatchfreq1 = 0.0;			for (i = 0; i < nvalues1; i++)			{				if (hasmatch1[i])					matchfreq1 += numbers1[i];				else					unmatchfreq1 += numbers1[i];			}			CLAMP_PROBABILITY(matchfreq1);			CLAMP_PROBABILITY(unmatchfreq1);			matchfreq2 = unmatchfreq2 = 0.0;			for (i = 0; i < nvalues2; i++)			{				if (hasmatch2[i])					matchfreq2 += numbers2[i];				else					unmatchfreq2 += numbers2[i];			}			CLAMP_PROBABILITY(matchfreq2);			CLAMP_PROBABILITY(unmatchfreq2);			pfree(hasmatch1);			pfree(hasmatch2);			/*			 * Compute total frequency of non-null values that are not in			 * the MCV lists.			 */			otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;			otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;			CLAMP_PROBABILITY(otherfreq1);			CLAMP_PROBABILITY(otherfreq2);			/*			 * We can estimate the total selectivity from the point of			 * view of relation 1 as: the known selectivity for matched			 * MCVs, plus unmatched MCVs that are assumed to match against			 * random members of relation 2's non-MCV population, plus			 * non-MCV values that are assumed to match against random			 * members of relation 2's unmatched MCVs plus non-MCV values.			 */			totalsel1 = matchprodfreq;			if (nd2 > nvalues2)				totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);			if (nd2 > nmatches)				totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /					(nd2 - nmatches);			/* Same estimate from the point of view of relation 2. */			totalsel2 = matchprodfreq;			if (nd1 > nvalues1)				totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);			if (nd1 > nmatches)				totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /					(nd1 - nmatches);			/*			 * Use the smaller of the two estimates.  This can be			 * justified in essentially the same terms as given below for			 * the no-stats case: to a first approximation, we are			 * estimating from the point of view of the relation with			 * smaller nd.			 */			selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;		}		else		{			/*			 * We do not have MCV lists for both sides.  Estimate the join			 * selectivity as			 * MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This is			 * plausible if we assume that the join operator is strict and			 * the non-null values are about equally distributed: a given			 * non-null tuple of rel1 will join to either zero or			 * N2*(1-nullfrac2)/nd2 rows of rel2, so total join rows are			 * at most N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join			 * selectivity of not more than			 * (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it is			 * not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the			 * expression with MIN() is an upper bound.  Using the MIN()			 * means we estimate from the point of view of the relation			 * with smaller nd (since the larger nd is determining the			 * MIN).  It is reasonable to assume that most tuples in this			 * rel will have join partners, so the bound is probably			 * reasonably tight and should be taken as-is.			 *			 * XXX Can we be smarter if we have an MCV list for just one			 * side? It seems that if we assume equal distribution for the			 * other side, we end up with the same answer anyway.			 */			double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;			double		nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;			selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);			if (nd1 > nd2)				selec /= nd1;			else				selec /= nd2;		}		if (have_mcvs1)			free_attstatsslot(var1->vartype, values1, nvalues1,							  numbers1, nnumbers1);		if (have_mcvs2)			free_attstatsslot(var2->vartype, values2, nvalues2,							  numbers2, nnumbers2);		if (HeapTupleIsValid(statsTuple1))			ReleaseSysCache(statsTuple1);		if (HeapTupleIsValid(statsTuple2))			ReleaseSysCache(statsTuple2);	}	CLAMP_PROBABILITY(selec);	PG_RETURN_FLOAT8((float8) selec);}/* *		neqjoinsel		- Join selectivity of "!=" */Datumneqjoinsel(PG_FUNCTION_ARGS){	Query	   *root = (Query *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);	Oid			eqop;	float8		result;	/*	 * We want 1 - eqjoinsel() where the equality operator is the one	 * associated with this != operator, that is, its negator.	 */	eqop = get_negator(operator);	if (eqop)	{		result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,													PointerGetDatum(root),												  ObjectIdGetDatum(eqop),													PointerGetDatum(args),											   Int16GetDatum(jointype)));	}	else	{		/* Use default selectivity (should we raise an error instead?) */		result = DEFAULT_EQ_SEL;	}	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		scalarltjoinsel - Join selectivity of "<" and "<=" for scalars */Datumscalarltjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* *		scalargtjoinsel - Join selectivity of ">" and ">=" for scalars */Datumscalargtjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* *		regexeqjoinsel	- Join selectivity of regular-expression pattern match. */Datumregexeqjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		icregexeqjoinsel	- Join selectivity of case-insensitive regex match. */Datumicregexeqjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		likejoinsel			- Join selectivity of LIKE pattern match. */Datumlikejoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		iclikejoinsel			- Join selectivity of ILIKE pattern match. */Datumiclikejoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		regexnejoinsel	- Join selectivity of regex non-match. */Datumregexnejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(regexeqjoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		icregexnejoinsel	- Join selectivity of case-insensitive regex non-match. */Datumicregexnejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(icregexeqjoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		nlikejoinsel		- Join selectivity of LIKE pattern non-match. */Datumnlikejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(likejoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		icnlikejoinsel		- Join selectivity of ILIKE pattern non-match. */Datumicnlikejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(iclikejoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* * mergejoinscansel			- Scan selectivity of merge join. * * A merge join will stop as soon as it exhausts either input stream. * Therefore, if we can estimate the ranges of both input variables, * we can estimate how much of the input will actually be read.  This * can have a considerable impact on the cost when using indexscans. * * clause should be a clause already known to be mergejoinable. * * *leftscan is set to the fraction of the left-hand variable expected * to be scanned (0 to 1), and similarly *rightscan for the right-hand * variable. */voidmergejoinscansel(Query *root, Node *clause,				 Selectivity *leftscan,				 Selectivity *rightscan){	Var		   *left,			   *right;	Oid			lefttype,				righttype;	Oid			opno,				lsortop,				rsortop,				ltop,				gtop,				leop,				revgtop,				revleop;	Datum		leftmax,				rightmax;	double		selec;	/* Set default results if we can't figure anything out. */	*leftscan = *rightscan = 1.0;	/* Deconstruct the merge clause */	if (!is_opclause(clause))		return;					/* shouldn't happen */	opno = ((OpExpr *) clause)->opno;	left = (Var *) get_leftop((Expr *) clause);	right = (Var *) get_rightop((Expr *) clause);	if (!right)		return;					/* shouldn't happen */	/* Save the direct input types of the operator */	lefttype = exprType((Node *) left);	righttype = exprType((Node *) right);	/*	 * Now skip any binary-compatible relabeling; there can only be one	 * level since constant-expression folder eliminates adjacent	 * RelabelTypes.	 */	if (IsA(left, RelabelType))		left = (Var *) ((RelabelType *) left)->arg;	if (IsA(right, RelabelType))		right = (Var *) ((RelabelType *) right)->arg;	/* Can't do anything if inputs are not Vars */	if (!IsA(left, Var) ||		!IsA(right, Var))		return;	/* Verify mergejoinability and get left and right "<" operators */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?