clausesel.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 767 行 · 第 1/2 页

C
767
字号
/*------------------------------------------------------------------------- * * clausesel.c *	  Routines to compute clause selectivities * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.90 2008/01/11 17:00:45 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "catalog/pg_operator.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/plancat.h"#include "parser/parsetree.h"#include "utils/fmgroids.h"#include "utils/lsyscache.h"#include "utils/selfuncs.h"/* * Data structure for accumulating info about possible range-query * clause pairs in clauselist_selectivity. */typedef struct RangeQueryClause{	struct RangeQueryClause *next;		/* next in linked list */	Node	   *var;			/* The common variable of the clauses */	bool		have_lobound;	/* found a low-bound clause yet? */	bool		have_hibound;	/* found a high-bound clause yet? */	Selectivity lobound;		/* Selectivity of a var > something clause */	Selectivity hibound;		/* Selectivity of a var < something clause */} RangeQueryClause;static void addRangeClause(RangeQueryClause **rqlist, Node *clause,			   bool varonleft, bool isLTsel, Selectivity s2);/**************************************************************************** *		ROUTINES TO COMPUTE SELECTIVITIES ****************************************************************************//* * clauselist_selectivity - *	  Compute the selectivity of an implicitly-ANDed list of boolean *	  expression clauses.  The list can be empty, in which case 1.0 *	  must be returned.  List elements may be either RestrictInfos *	  or bare expression clauses --- the former is preferred since *	  it allows caching of results. * * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses.	However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent.  So, * we want to be smarter where we can. * Currently, the only extra smarts we have is to recognize "range queries", * such as "x > 34 AND x < 42".  Clauses are recognized as possible range * query components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. * We pair up clauses of this form that refer to the same variable.  An * unpairable clause of this kind is simply multiplied into the selectivity * product in the normal way.  But when we find a pair, we know that the * selectivities represent the relative positions of the low and high bounds * within the column's range, so instead of figuring the selectivity as * hisel * losel, we can figure it as hisel + losel - 1.  (To visualize this, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before * interpreting it as a value.	Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) * * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation * and instead use DEFAULT_RANGE_INEQ_SEL.	The same applies if the equation * yields an impossible (negative) result. * * A free side-effect is that we can recognize redundant inequalities such * as "x < 4 AND x < 5"; only the tighter constraint will be counted. * * Of course this is all very dependent on the behavior of * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */Selectivityclauselist_selectivity(PlannerInfo *root,					   List *clauses,					   int varRelid,					   JoinType jointype){	Selectivity s1 = 1.0;	RangeQueryClause *rqlist = NULL;	ListCell   *l;	/*	 * If there's exactly one clause, then no use in trying to match up	 * pairs, so just go directly to clause_selectivity().	 */	if (list_length(clauses) == 1)		return clause_selectivity(root, (Node *) linitial(clauses),								  varRelid, jointype);	/*	 * Initial scan over clauses.  Anything that doesn't look like a potential	 * rangequery clause gets multiplied into s1 and forgotten. Anything that	 * does gets inserted into an rqlist entry.	 */	foreach(l, clauses)	{		Node	   *clause = (Node *) lfirst(l);		RestrictInfo *rinfo;		Selectivity s2;		/* Always compute the selectivity using clause_selectivity */		s2 = clause_selectivity(root, clause, varRelid, jointype);		/*		 * Check for being passed a RestrictInfo.		 *		 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or		 * 0.0; just use that rather than looking for range pairs.		 */		if (IsA(clause, RestrictInfo))		{			rinfo = (RestrictInfo *) clause;			if (rinfo->pseudoconstant)			{				s1 = s1 * s2;				continue;			}			clause = (Node *) rinfo->clause;		}		else			rinfo = NULL;		/*		 * See if it looks like a restriction clause with a pseudoconstant on		 * one side.  (Anything more complicated than that might not behave in		 * the simple way we are expecting.)  Most of the tests here can be		 * done more efficiently with rinfo than without.		 */		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)		{			OpExpr	   *expr = (OpExpr *) clause;			bool		varonleft = true;			bool		ok;			if (rinfo)			{				ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&					(is_pseudo_constant_clause_relids(lsecond(expr->args),													  rinfo->right_relids) ||					 (varonleft = false,					  is_pseudo_constant_clause_relids(linitial(expr->args),													   rinfo->left_relids)));			}			else			{				ok = (NumRelids(clause) == 1) &&					(is_pseudo_constant_clause(lsecond(expr->args)) ||					 (varonleft = false,					  is_pseudo_constant_clause(linitial(expr->args))));			}			if (ok)			{				/*				 * If it's not a "<" or ">" operator, just merge the				 * selectivity in generically.	But if it's the right oprrest,				 * add the clause to rqlist for later processing.				 */				switch (get_oprrest(expr->opno))				{					case F_SCALARLTSEL:						addRangeClause(&rqlist, clause,									   varonleft, true, s2);						break;					case F_SCALARGTSEL:						addRangeClause(&rqlist, clause,									   varonleft, false, s2);						break;					default:						/* Just merge the selectivity in generically */						s1 = s1 * s2;						break;				}				continue;		/* drop to loop bottom */			}		}		/* Not the right form, so treat it generically. */		s1 = s1 * s2;	}	/*	 * Now scan the rangequery pair list.	 */	while (rqlist != NULL)	{		RangeQueryClause *rqnext;		if (rqlist->have_lobound && rqlist->have_hibound)		{			/* Successfully matched a pair of range clauses */			Selectivity s2;			/*			 * Exact equality to the default value probably means the			 * selectivity function punted.  This is not airtight but should			 * be good enough.			 */			if (rqlist->hibound == DEFAULT_INEQ_SEL ||				rqlist->lobound == DEFAULT_INEQ_SEL)			{				s2 = DEFAULT_RANGE_INEQ_SEL;			}			else			{				s2 = rqlist->hibound + rqlist->lobound - 1.0;				/* Adjust for double-exclusion of NULLs */				/* HACK: disable nulltestsel's special outer-join logic */				s2 += nulltestsel(root, IS_NULL, rqlist->var,								  varRelid, JOIN_INNER);				/*				 * A zero or slightly negative s2 should be converted into a				 * small positive value; we probably are dealing with a very				 * tight range and got a bogus result due to roundoff errors.				 * However, if s2 is very negative, then we probably have				 * default selectivity estimates on one or both sides of the				 * range that we failed to recognize above for some reason.				 */				if (s2 <= 0.0)				{					if (s2 < -0.01)					{						/*						 * No data available --- use a default estimate that						 * is small, but not real small.						 */						s2 = DEFAULT_RANGE_INEQ_SEL;					}					else					{						/*						 * It's just roundoff error; use a small positive						 * value						 */						s2 = 1.0e-10;					}				}			}			/* Merge in the selectivity of the pair of clauses */			s1 *= s2;		}		else		{			/* Only found one of a pair, merge it in generically */			if (rqlist->have_lobound)				s1 *= rqlist->lobound;			else				s1 *= rqlist->hibound;		}		/* release storage and advance */		rqnext = rqlist->next;		pfree(rqlist);		rqlist = rqnext;	}	return s1;}/* * addRangeClause --- add a new range clause for clauselist_selectivity * * Here is where we try to match up pairs of range-query clauses */static voidaddRangeClause(RangeQueryClause **rqlist, Node *clause,			   bool varonleft, bool isLTsel, Selectivity s2){	RangeQueryClause *rqelem;	Node	   *var;	bool		is_lobound;	if (varonleft)	{		var = get_leftop((Expr *) clause);		is_lobound = !isLTsel;	/* x < something is high bound */	}	else	{		var = get_rightop((Expr *) clause);		is_lobound = isLTsel;	/* something < x is low bound */	}	for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)	{		/*		 * We use full equal() here because the "var" might be a function of		 * one or more attributes of the same relation...		 */		if (!equal(var, rqelem->var))			continue;		/* Found the right group to put this clause in */		if (is_lobound)		{			if (!rqelem->have_lobound)			{				rqelem->have_lobound = true;				rqelem->lobound = s2;			}			else			{				/*------				 * We have found two similar clauses, such as				 * x < y AND x < z.				 * Keep only the more restrictive one.				 *------				 */				if (rqelem->lobound > s2)					rqelem->lobound = s2;			}		}		else		{			if (!rqelem->have_hibound)			{				rqelem->have_hibound = true;				rqelem->hibound = s2;			}			else			{				/*------				 * We have found two similar clauses, such as				 * x > y AND x > z.				 * Keep only the more restrictive one.				 *------				 */				if (rqelem->hibound > s2)					rqelem->hibound = s2;			}		}		return;	}	/* No matching var found, so make a new clause-pair data structure */	rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));	rqelem->var = var;	if (is_lobound)	{		rqelem->have_lobound = true;		rqelem->have_hibound = false;		rqelem->lobound = s2;	}	else	{		rqelem->have_lobound = false;		rqelem->have_hibound = true;		rqelem->hibound = s2;	}	rqelem->next = *rqlist;	*rqlist = rqelem;}/* * bms_is_subset_singleton * * Same result as bms_is_subset(s, bms_make_singleton(x)), * but a little faster and doesn't leak memory. * * Is this of use anywhere else?  If so move to bitmapset.c ... */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?