where.c
来自「sqlite 3.3.8 支持加密的版本」· C语言 代码 · 共 1,821 行 · 第 1/5 页
C
1,821 行
for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){
if( pTerm->leftCursor != pSrc->iCursor ) continue;
if( pTerm->eOperator==WO_IN ) continue;
pIdxCons[j].iColumn = pTerm->leftColumn;
pIdxCons[j].iTermOffset = i;
pIdxCons[j].op = pTerm->eOperator;
/* The direct assignment in the previous line is possible only because
** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The
** following asserts verify this fact. */
assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ );
assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT );
assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE );
assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT );
assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE );
assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH );
assert( pTerm->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) );
j++;
}
for(i=0; i<nOrderBy; i++){
Expr *pExpr = pOrderBy->a[i].pExpr;
pIdxOrderBy[i].iColumn = pExpr->iColumn;
pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder;
}
}
/* At this point, the sqlite3_index_info structure that pIdxInfo points
** to will have been initialized, either during the current invocation or
** during some prior invocation. Now we just have to customize the
** details of pIdxInfo for the current invocation and pass it to
** xBestIndex.
*/
/* The module name must be defined */
assert( pTab->azModuleArg && pTab->azModuleArg[0] );
if( pTab->pVtab==0 ){
sqlite3ErrorMsg(pParse, "undefined module %s for table %s",
pTab->azModuleArg[0], pTab->zName);
return 0.0;
}
/* Set the aConstraint[].usable fields and initialize all
** output variables to zero.
**
** aConstraint[].usable is true for constraints where the right-hand
** side contains only references to tables to the left of the current
** table. In other words, if the constraint is of the form:
**
** column = expr
**
** and we are evaluating a join, then the constraint on column is
** only valid if all tables referenced in expr occur to the left
** of the table containing column.
**
** The aConstraints[] array contains entries for all constraints
** on the current table. That way we only have to compute it once
** even though we might try to pick the best index multiple times.
** For each attempt at picking an index, the order of tables in the
** join might be different so we have to recompute the usable flag
** each time.
*/
pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint;
pUsage = pIdxInfo->aConstraintUsage;
for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){
j = pIdxCons->iTermOffset;
pTerm = &pWC->a[j];
pIdxCons->usable = (pTerm->prereqRight & notReady)==0;
}
memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint);
if( pIdxInfo->needToFreeIdxStr ){
sqlite3_free(pIdxInfo->idxStr);
}
pIdxInfo->idxStr = 0;
pIdxInfo->idxNum = 0;
pIdxInfo->needToFreeIdxStr = 0;
pIdxInfo->orderByConsumed = 0;
pIdxInfo->estimatedCost = SQLITE_BIG_DBL / 2.0;
nOrderBy = pIdxInfo->nOrderBy;
if( pIdxInfo->nOrderBy && !orderByUsable ){
*(int*)&pIdxInfo->nOrderBy = 0;
}
sqlite3SafetyOff(pParse->db);
TRACE(("xBestIndex for %s\n", pTab->zName));
TRACE_IDX_INPUTS(pIdxInfo);
rc = pTab->pVtab->pModule->xBestIndex(pTab->pVtab, pIdxInfo);
TRACE_IDX_OUTPUTS(pIdxInfo);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_NOMEM ){
sqlite3FailedMalloc();
}else {
sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc));
}
sqlite3SafetyOn(pParse->db);
}else{
rc = sqlite3SafetyOn(pParse->db);
}
*(int*)&pIdxInfo->nOrderBy = nOrderBy;
return pIdxInfo->estimatedCost;
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */
/*
** Find the best index for accessing a particular table. Return a pointer
** to the index, flags that describe how the index should be used, the
** number of equality constraints, and the "cost" for this index.
**
** The lowest cost index wins. The cost is an estimate of the amount of
** CPU and disk I/O need to process the request using the selected index.
** Factors that influence cost include:
**
** * The estimated number of rows that will be retrieved. (The
** fewer the better.)
**
** * Whether or not sorting must occur.
**
** * Whether or not there must be separate lookups in the
** index and in the main table.
**
*/
static double bestIndex(
Parse *pParse, /* The parsing context */
WhereClause *pWC, /* The WHERE clause */
struct SrcList_item *pSrc, /* The FROM clause term to search */
Bitmask notReady, /* Mask of cursors that are not available */
ExprList *pOrderBy, /* The order by clause */
Index **ppIndex, /* Make *ppIndex point to the best index */
int *pFlags, /* Put flags describing this choice in *pFlags */
int *pnEq /* Put the number of == or IN constraints here */
){
WhereTerm *pTerm;
Index *bestIdx = 0; /* Index that gives the lowest cost */
double lowestCost; /* The cost of using bestIdx */
int bestFlags = 0; /* Flags associated with bestIdx */
int bestNEq = 0; /* Best value for nEq */
int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */
Index *pProbe; /* An index we are evaluating */
int rev; /* True to scan in reverse order */
int flags; /* Flags associated with pProbe */
int nEq; /* Number of == or IN constraints */
double cost; /* Cost of using pProbe */
TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady));
lowestCost = SQLITE_BIG_DBL;
pProbe = pSrc->pTab->pIndex;
/* If the table has no indices and there are no terms in the where
** clause that refer to the ROWID, then we will never be able to do
** anything other than a full table scan on this table. We might as
** well put it first in the join order. That way, perhaps it can be
** referenced by other tables in the join.
*/
if( pProbe==0 &&
findTerm(pWC, iCur, -1, 0, WO_EQ|WO_IN|WO_LT|WO_LE|WO_GT|WO_GE,0)==0 &&
(pOrderBy==0 || !sortableByRowid(iCur, pOrderBy, &rev)) ){
*pFlags = 0;
*ppIndex = 0;
*pnEq = 0;
return 0.0;
}
/* Check for a rowid=EXPR or rowid IN (...) constraints
*/
pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0);
if( pTerm ){
Expr *pExpr;
*ppIndex = 0;
bestFlags = WHERE_ROWID_EQ;
if( pTerm->eOperator & WO_EQ ){
/* Rowid== is always the best pick. Look no further. Because only
** a single row is generated, output is always in sorted order */
*pFlags = WHERE_ROWID_EQ | WHERE_UNIQUE;
*pnEq = 1;
TRACE(("... best is rowid\n"));
return 0.0;
}else if( (pExpr = pTerm->pExpr)->pList!=0 ){
/* Rowid IN (LIST): cost is NlogN where N is the number of list
** elements. */
lowestCost = pExpr->pList->nExpr;
lowestCost *= estLog(lowestCost);
}else{
/* Rowid IN (SELECT): cost is NlogN where N is the number of rows
** in the result of the inner select. We have no way to estimate
** that value so make a wild guess. */
lowestCost = 200;
}
TRACE(("... rowid IN cost: %.9g\n", lowestCost));
}
/* Estimate the cost of a table scan. If we do not know how many
** entries are in the table, use 1 million as a guess.
*/
cost = pProbe ? pProbe->aiRowEst[0] : 1000000;
TRACE(("... table scan base cost: %.9g\n", cost));
flags = WHERE_ROWID_RANGE;
/* Check for constraints on a range of rowids in a table scan.
*/
pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0);
if( pTerm ){
if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){
flags |= WHERE_TOP_LIMIT;
cost /= 3; /* Guess that rowid<EXPR eliminates two-thirds or rows */
}
if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){
flags |= WHERE_BTM_LIMIT;
cost /= 3; /* Guess that rowid>EXPR eliminates two-thirds of rows */
}
TRACE(("... rowid range reduces cost to %.9g\n", cost));
}else{
flags = 0;
}
/* If the table scan does not satisfy the ORDER BY clause, increase
** the cost by NlogN to cover the expense of sorting. */
if( pOrderBy ){
if( sortableByRowid(iCur, pOrderBy, &rev) ){
flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE;
if( rev ){
flags |= WHERE_REVERSE;
}
}else{
cost += cost*estLog(cost);
TRACE(("... sorting increases cost to %.9g\n", cost));
}
}
if( cost<lowestCost ){
lowestCost = cost;
bestFlags = flags;
}
/* Look at each index.
*/
for(; pProbe; pProbe=pProbe->pNext){
int i; /* Loop counter */
double inMultiplier = 1;
TRACE(("... index %s:\n", pProbe->zName));
/* Count the number of columns in the index that are satisfied
** by x=EXPR constraints or x IN (...) constraints.
*/
flags = 0;
for(i=0; i<pProbe->nColumn; i++){
int j = pProbe->aiColumn[i];
pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ|WO_IN, pProbe);
if( pTerm==0 ) break;
flags |= WHERE_COLUMN_EQ;
if( pTerm->eOperator & WO_IN ){
Expr *pExpr = pTerm->pExpr;
flags |= WHERE_COLUMN_IN;
if( pExpr->pSelect!=0 ){
inMultiplier *= 25;
}else if( pExpr->pList!=0 ){
inMultiplier *= pExpr->pList->nExpr + 1;
}
}
}
cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier);
nEq = i;
if( pProbe->onError!=OE_None && (flags & WHERE_COLUMN_IN)==0
&& nEq==pProbe->nColumn ){
flags |= WHERE_UNIQUE;
}
TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost));
/* Look for range constraints
*/
if( nEq<pProbe->nColumn ){
int j = pProbe->aiColumn[nEq];
pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe);
if( pTerm ){
flags |= WHERE_COLUMN_RANGE;
if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){
flags |= WHERE_TOP_LIMIT;
cost /= 3;
}
if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){
flags |= WHERE_BTM_LIMIT;
cost /= 3;
}
TRACE(("...... range reduces cost to %.9g\n", cost));
}
}
/* Add the additional cost of sorting if that is a factor.
*/
if( pOrderBy ){
if( (flags & WHERE_COLUMN_IN)==0 &&
isSortingIndex(pParse,pProbe,iCur,pOrderBy,nEq,&rev) ){
if( flags==0 ){
flags = WHERE_COLUMN_RANGE;
}
flags |= WHERE_ORDERBY;
if( rev ){
flags |= WHERE_REVERSE;
}
}else{
cost += cost*estLog(cost);
TRACE(("...... orderby increases cost to %.9g\n", cost));
}
}
/* Check to see if we can get away with using just the index without
** ever reading the table. If that is the case, then halve the
** cost of this index.
*/
if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){
Bitmask m = pSrc->colUsed;
int j;
for(j=0; j<pProbe->nColumn; j++){
int x = pProbe->aiColumn[j];
if( x<BMS-1 ){
m &= ~(((Bitmask)1)<<x);
}
}
if( m==0 ){
flags |= WHERE_IDX_ONLY;
cost /= 2;
TRACE(("...... idx-only reduces cost to %.9g\n", cost));
}
}
/* If this index has achieved the lowest cost so far, then use it.
*/
if( cost < lowestCost ){
bestIdx = pProbe;
lowestCost = cost;
assert( flags!=0 );
bestFlags = flags;
bestNEq = nEq;
}
}
/* Report the best result
*/
*ppIndex = bestIdx;
TRACE(("best index is %s, cost=%.9g, flags=%x, nEq=%d\n",
bestIdx ? bestIdx->zName : "(none)", lowestCost, bestFlags, bestNEq));
*pFlags = bestFlags;
*pnEq = bestNEq;
return lowestCost;
}
/*
** Disable a term in the WHERE clause. Except, do not disable the term
** if it controls a LEFT OUTER JOIN and it did not originate in the ON
** or USING clause of that join.
**
** Consider the term t2.z='ok' in the following queries:
**
** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok'
** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok'
** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok'
**
** The t2.z='ok' is disabled in the in (2) because it originates
** in the ON clause. The term is disabled in (3) because it is not part
** of a LEFT OUTER JOIN. In (1), the term is not disabled.
**
** Disabling a term causes that term to not be tested in the inner loop
** of the join. Disabling is an optimization. When terms are satisfied
** by indices, we disable them to prevent redundant tests in the inner
** loop. We would get the correct results if nothing were ever disabled,
** but joins might run a little slower. The trick is to disable as much
** as we can without disabling too much. If we disabled in (1), we'd get
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?