selfuncs.c

来自「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」· C语言代码 · 共 2,328 行 · 第 1/5 页
2,328 行
		 * NOTE: it might seem that we should unique-ify the lefthand input		 * when considering JOIN_REVERSE_IN.  But this is not so, because the		 * join clause we've been handed has not been commuted from the way		 * the parser originally wrote it.	We know that the unique side of		 * the IN clause is *always* on the right.		 *		 * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or		 * JOIN_RIGHT here, because we do not have enough information to		 * determine which var is really on which side of the join. Perhaps		 * someday we should pass in more information.		 */		if (jointype == JOIN_IN ||			jointype == JOIN_REVERSE_IN ||			jointype == JOIN_UNIQUE_INNER ||			jointype == JOIN_UNIQUE_OUTER)		{			float4		oneovern = 1.0 / nd2;			for (i = 0; i < nvalues2; i++)				numbers2[i] = oneovern;			nullfrac2 = oneovern;		}		/*		 * Note we assume that each MCV will match at most one member of the		 * other MCV list.	If the operator isn't really equality, there could		 * be multiple matches --- but we don't look for them, both for speed		 * and because the math wouldn't add up...		 */		matchprodfreq = 0.0;		nmatches = 0;		for (i = 0; i < nvalues1; i++)		{			int			j;			for (j = 0; j < nvalues2; j++)			{				if (hasmatch2[j])					continue;				if (DatumGetBool(FunctionCall2(&eqproc,											   values1[i],											   values2[j])))				{					hasmatch1[i] = hasmatch2[j] = true;					matchprodfreq += numbers1[i] * numbers2[j];					nmatches++;					break;				}			}		}		CLAMP_PROBABILITY(matchprodfreq);		/* Sum up frequencies of matched and unmatched MCVs */		matchfreq1 = unmatchfreq1 = 0.0;		for (i = 0; i < nvalues1; i++)		{			if (hasmatch1[i])				matchfreq1 += numbers1[i];			else				unmatchfreq1 += numbers1[i];		}		CLAMP_PROBABILITY(matchfreq1);		CLAMP_PROBABILITY(unmatchfreq1);		matchfreq2 = unmatchfreq2 = 0.0;		for (i = 0; i < nvalues2; i++)		{			if (hasmatch2[i])				matchfreq2 += numbers2[i];			else				unmatchfreq2 += numbers2[i];		}		CLAMP_PROBABILITY(matchfreq2);		CLAMP_PROBABILITY(unmatchfreq2);		pfree(hasmatch1);		pfree(hasmatch2);		/*		 * Compute total frequency of non-null values that are not in the MCV		 * lists.		 */		otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;		otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;		CLAMP_PROBABILITY(otherfreq1);		CLAMP_PROBABILITY(otherfreq2);		/*		 * We can estimate the total selectivity from the point of view of		 * relation 1 as: the known selectivity for matched MCVs, plus		 * unmatched MCVs that are assumed to match against random members of		 * relation 2's non-MCV population, plus non-MCV values that are		 * assumed to match against random members of relation 2's unmatched		 * MCVs plus non-MCV values.		 */		totalsel1 = matchprodfreq;		if (nd2 > nvalues2)			totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);		if (nd2 > nmatches)			totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /				(nd2 - nmatches);		/* Same estimate from the point of view of relation 2. */		totalsel2 = matchprodfreq;		if (nd1 > nvalues1)			totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);		if (nd1 > nmatches)			totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /				(nd1 - nmatches);		/*		 * Use the smaller of the two estimates.  This can be justified in		 * essentially the same terms as given below for the no-stats case: to		 * a first approximation, we are estimating from the point of view of		 * the relation with smaller nd.		 */		selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;	}	else	{		/*		 * We do not have MCV lists for both sides.  Estimate the join		 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This		 * is plausible if we assume that the join operator is strict and the		 * non-null values are about equally distributed: a given non-null		 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows		 * of rel2, so total join rows are at most		 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of		 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it		 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression		 * with MIN() is an upper bound.  Using the MIN() means we estimate		 * from the point of view of the relation with smaller nd (since the		 * larger nd is determining the MIN).  It is reasonable to assume that		 * most tuples in this rel will have join partners, so the bound is		 * probably reasonably tight and should be taken as-is.		 *		 * XXX Can we be smarter if we have an MCV list for just one side? It		 * seems that if we assume equal distribution for the other side, we		 * end up with the same answer anyway.		 */		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;		double		nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);		if (nd1 > nd2)			selec /= nd1;		else			selec /= nd2;	}	if (have_mcvs1)		free_attstatsslot(vardata1.atttype, values1, nvalues1,						  numbers1, nnumbers1);	if (have_mcvs2)		free_attstatsslot(vardata2.atttype, values2, nvalues2,						  numbers2, nnumbers2);	ReleaseVariableStats(vardata1);	ReleaseVariableStats(vardata2);	CLAMP_PROBABILITY(selec);	PG_RETURN_FLOAT8((float8) selec);}/* *		neqjoinsel		- Join selectivity of "!=" */Datumneqjoinsel(PG_FUNCTION_ARGS){	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);	Oid			eqop;	float8		result;	/*	 * We want 1 - eqjoinsel() where the equality operator is the one	 * associated with this != operator, that is, its negator.	 */	eqop = get_negator(operator);	if (eqop)	{		result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,													PointerGetDatum(root),													ObjectIdGetDatum(eqop),													PointerGetDatum(args),													Int16GetDatum(jointype)));	}	else	{		/* Use default selectivity (should we raise an error instead?) */		result = DEFAULT_EQ_SEL;	}	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		scalarltjoinsel - Join selectivity of "<" and "<=" for scalars */Datumscalarltjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* *		scalargtjoinsel - Join selectivity of ">" and ">=" for scalars */Datumscalargtjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* *		regexeqjoinsel	- Join selectivity of regular-expression pattern match. */Datumregexeqjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		icregexeqjoinsel	- Join selectivity of case-insensitive regex match. */Datumicregexeqjoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		likejoinsel			- Join selectivity of LIKE pattern match. */Datumlikejoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		iclikejoinsel			- Join selectivity of ILIKE pattern match. */Datumiclikejoinsel(PG_FUNCTION_ARGS){	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* *		regexnejoinsel	- Join selectivity of regex non-match. */Datumregexnejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(regexeqjoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		icregexnejoinsel	- Join selectivity of case-insensitive regex non-match. */Datumicregexnejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(icregexeqjoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		nlikejoinsel		- Join selectivity of LIKE pattern non-match. */Datumnlikejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(likejoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *		icnlikejoinsel		- Join selectivity of ILIKE pattern non-match. */Datumicnlikejoinsel(PG_FUNCTION_ARGS){	float8		result;	result = DatumGetFloat8(iclikejoinsel(fcinfo));	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* * mergejoinscansel			- Scan selectivity of merge join. * * A merge join will stop as soon as it exhausts either input stream. * Therefore, if we can estimate the ranges of both input variables, * we can estimate how much of the input will actually be read.  This * can have a considerable impact on the cost when using indexscans. * * clause should be a clause already known to be mergejoinable. * * *leftscan is set to the fraction of the left-hand variable expected * to be scanned (0 to 1), and similarly *rightscan for the right-hand * variable. */voidmergejoinscansel(PlannerInfo *root, Node *clause,				 Selectivity *leftscan,				 Selectivity *rightscan){	Node	   *left,			   *right;	VariableStatData leftvar,				rightvar;	Oid			lefttype,				righttype;	Oid			opno,				lsortop,				rsortop,				ltop,				gtop,				leop,				revgtop,				revleop;	Datum		leftmax,				rightmax;	double		selec;	/* Set default results if we can't figure anything out. */	*leftscan = *rightscan = 1.0;	/* Deconstruct the merge clause */	if (!is_opclause(clause))		return;					/* shouldn't happen */	opno = ((OpExpr *) clause)->opno;	left = get_leftop((Expr *) clause);	right = get_rightop((Expr *) clause);	if (!right)		return;					/* shouldn't happen */	/* Look for stats for the inputs */	examine_variable(root, left, 0, &leftvar);	examine_variable(root, right, 0, &rightvar);	/* Get the direct input types of the operator */	lefttype = exprType(left);	righttype = exprType(right);	/* Verify mergejoinability and get left and right "<" operators */	if (!op_mergejoinable(opno,						  &lsortop,						  &rsortop))		goto fail;				/* shouldn't happen */	/* Try to get maximum values of both inputs */	if (!get_variable_maximum(root, &leftvar, lsortop, &leftmax))		goto fail;				/* no max available from stats */	if (!get_variable_maximum(root, &rightvar, rsortop, &rightmax))		goto fail;				/* no max available from stats */	/* Look up the "left < right" and "left > right" operators */	op_mergejoin_crossops(opno, &ltop, &gtop, NULL, NULL);	/* Look up the "left <= right" operator */	leop = get_negator(gtop);	if (!OidIsValid(leop))		goto fail;				/* insufficient info in catalogs */	/* Look up the "right > left" operator */	revgtop = get_commutator(ltop);	if (!OidIsValid(revgtop))		goto fail;				/* insufficient info in catalogs */	/* Look up the "right <= left" operator */	revleop = get_negator(revgtop);	if (!OidIsValid(revleop))		goto fail;				/* insufficient info in catalogs */	/*	 * Now, the fraction of the left variable that will be scanned is the	 * fraction that's <= the right-side maximum value.  But only believe	 * non-default estimates, else stick with our 1.0.	 */	selec = scalarineqsel(root, leop, false, &leftvar,						  rightmax, righttype);	if (selec != DEFAULT_INEQ_SEL)		*leftscan = selec;	/* And similarly for the right variable. */	selec = scalarineqsel(root, revleop, false, &rightvar,						  leftmax, lefttype);	if (selec != DEFAULT_INEQ_SEL)		*rightscan = selec;	/*	 * Only one of the two fractions can really be less than 1.0; believe the	 * smaller estimate and reset the other one to exactly 1.0.  If we get	 * exactly equal estimates (as can easily happen with self-joins), believe	 * neither.	 */	if (*leftscan > *rightscan)		*leftscan = 1.0;	else if (*leftscan < *rightscan)		*rightscan = 1.0;	else		*leftscan = *rightscan = 1.0;fail:	ReleaseVariableStats(leftvar);	ReleaseVariableStats(rightvar);}/* * Helper routine for estimate_num_groups: add an item to a list of * GroupVarInfos, but only if it's not known equal to any of the existing * entries. */typedef struct{	Node	   *var;			/* might be an expression, not just a Var */	RelOptInfo *rel;			/* relation it belongs to */	double		ndistinct;		/* # distinct values */} GroupVarInfo;static List *add_unique_group_var(PlannerInfo *root, List *varinfos,					 Node *var, VariableStatData *vardata){	GroupVarInfo *varinfo;	double		ndistinct;	ListCell   *lc;	ndistinct = get_variable_numdistinct(vardata);	/* cannot use foreach here because of possible list_delete */	lc = list_head(varinfos);	while (lc)	{		varinfo = (GroupVarInfo *) lfirst(lc);		/* must advance lc before list_delete possibly pfree's it */		lc = lnext(lc);		/* Drop exact duplicates */		if (equal(var, varinfo->var))			return varinfos;		/*		 * Drop known-equal vars, but only if they belong to different		 * relations (see comments for estimate_num_groups)		 */		if (vardata->rel != varinfo->rel &&			exprs_known_equal(root, var, varinfo->var))
selfuncs.c - 源码说明

本页面展示了「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」中的 selfuncs.c 源码文件，采用 C语言编程语言编写，共 2,328 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?