📄 where.c
字号:
pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder; } } /* At this point, the sqlite3_index_info structure that pIdxInfo points ** to will have been initialized, either during the current invocation or ** during some prior invocation. Now we just have to customize the ** details of pIdxInfo for the current invocation and pass it to ** xBestIndex. */ /* The module name must be defined */ assert( pTab->azModuleArg && pTab->azModuleArg[0] ); if( pTab->pVtab==0 ){ sqlite3ErrorMsg(pParse, "undefined module %s for table %s", pTab->azModuleArg[0], pTab->zName); return 0.0; } /* Set the aConstraint[].usable fields and initialize all ** output variables to zero. ** ** aConstraint[].usable is true for constraints where the right-hand ** side contains only references to tables to the left of the current ** table. In other words, if the constraint is of the form: ** ** column = expr ** ** and we are evaluating a join, then the constraint on column is ** only valid if all tables referenced in expr occur to the left ** of the table containing column. ** ** The aConstraints[] array contains entries for all constraints ** on the current table. That way we only have to compute it once ** even though we might try to pick the best index multiple times. ** For each attempt at picking an index, the order of tables in the ** join might be different so we have to recompute the usable flag ** each time. */ pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; pUsage = pIdxInfo->aConstraintUsage; for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){ j = pIdxCons->iTermOffset; pTerm = &pWC->a[j]; pIdxCons->usable = (pTerm->prereqRight & notReady)==0; } memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint); if( pIdxInfo->needToFreeIdxStr ){ sqlite3_free(pIdxInfo->idxStr); } pIdxInfo->idxStr = 0; pIdxInfo->idxNum = 0; pIdxInfo->needToFreeIdxStr = 0; pIdxInfo->orderByConsumed = 0; pIdxInfo->estimatedCost = SQLITE_BIG_DBL / 2.0; nOrderBy = pIdxInfo->nOrderBy; if( pIdxInfo->nOrderBy && !orderByUsable ){ *(int*)&pIdxInfo->nOrderBy = 0; } sqlite3SafetyOff(pParse->db); TRACE(("xBestIndex for %s\n", pTab->zName)); TRACE_IDX_INPUTS(pIdxInfo); rc = pTab->pVtab->pModule->xBestIndex(pTab->pVtab, pIdxInfo); TRACE_IDX_OUTPUTS(pIdxInfo); if( rc!=SQLITE_OK ){ if( rc==SQLITE_NOMEM ){ sqlite3FailedMalloc(); }else { sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc)); } sqlite3SafetyOn(pParse->db); }else{ rc = sqlite3SafetyOn(pParse->db); } *(int*)&pIdxInfo->nOrderBy = nOrderBy; return pIdxInfo->estimatedCost;}#endif /* SQLITE_OMIT_VIRTUALTABLE *//*** Find the best index for accessing a particular table. Return a pointer** to the index, flags that describe how the index should be used, the** number of equality constraints, and the "cost" for this index.**** The lowest cost index wins. The cost is an estimate of the amount of** CPU and disk I/O need to process the request using the selected index.** Factors that influence cost include:**** * The estimated number of rows that will be retrieved. (The** fewer the better.)**** * Whether or not sorting must occur.**** * Whether or not there must be separate lookups in the** index and in the main table.***/static double bestIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ Bitmask notReady, /* Mask of cursors that are not available */ ExprList *pOrderBy, /* The order by clause */ Index **ppIndex, /* Make *ppIndex point to the best index */ int *pFlags, /* Put flags describing this choice in *pFlags */ int *pnEq /* Put the number of == or IN constraints here */){ WhereTerm *pTerm; Index *bestIdx = 0; /* Index that gives the lowest cost */ double lowestCost; /* The cost of using bestIdx */ int bestFlags = 0; /* Flags associated with bestIdx */ int bestNEq = 0; /* Best value for nEq */ int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ Index *pProbe; /* An index we are evaluating */ int rev; /* True to scan in reverse order */ int flags; /* Flags associated with pProbe */ int nEq; /* Number of == or IN constraints */ double cost; /* Cost of using pProbe */ TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady)); lowestCost = SQLITE_BIG_DBL; pProbe = pSrc->pTab->pIndex; /* If the table has no indices and there are no terms in the where ** clause that refer to the ROWID, then we will never be able to do ** anything other than a full table scan on this table. We might as ** well put it first in the join order. That way, perhaps it can be ** referenced by other tables in the join. */ if( pProbe==0 && findTerm(pWC, iCur, -1, 0, WO_EQ|WO_IN|WO_LT|WO_LE|WO_GT|WO_GE,0)==0 && (pOrderBy==0 || !sortableByRowid(iCur, pOrderBy, &rev)) ){ *pFlags = 0; *ppIndex = 0; *pnEq = 0; return 0.0; } /* Check for a rowid=EXPR or rowid IN (...) constraints */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); if( pTerm ){ Expr *pExpr; *ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->eOperator & WO_EQ ){ /* Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order */ *pFlags = WHERE_ROWID_EQ | WHERE_UNIQUE; *pnEq = 1; TRACE(("... best is rowid\n")); return 0.0; }else if( (pExpr = pTerm->pExpr)->pList!=0 ){ /* Rowid IN (LIST): cost is NlogN where N is the number of list ** elements. */ lowestCost = pExpr->pList->nExpr; lowestCost *= estLog(lowestCost); }else{ /* Rowid IN (SELECT): cost is NlogN where N is the number of rows ** in the result of the inner select. We have no way to estimate ** that value so make a wild guess. */ lowestCost = 200; } TRACE(("... rowid IN cost: %.9g\n", lowestCost)); } /* Estimate the cost of a table scan. If we do not know how many ** entries are in the table, use 1 million as a guess. */ cost = pProbe ? pProbe->aiRowEst[0] : 1000000; TRACE(("... table scan base cost: %.9g\n", cost)); flags = WHERE_ROWID_RANGE; /* Check for constraints on a range of rowids in a table scan. */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0); if( pTerm ){ if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){ flags |= WHERE_TOP_LIMIT; cost /= 3; /* Guess that rowid<EXPR eliminates two-thirds or rows */ } if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){ flags |= WHERE_BTM_LIMIT; cost /= 3; /* Guess that rowid>EXPR eliminates two-thirds of rows */ } TRACE(("... rowid range reduces cost to %.9g\n", cost)); }else{ flags = 0; } /* If the table scan does not satisfy the ORDER BY clause, increase ** the cost by NlogN to cover the expense of sorting. */ if( pOrderBy ){ if( sortableByRowid(iCur, pOrderBy, &rev) ){ flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE; if( rev ){ flags |= WHERE_REVERSE; } }else{ cost += cost*estLog(cost); TRACE(("... sorting increases cost to %.9g\n", cost)); } } if( cost<lowestCost ){ lowestCost = cost; bestFlags = flags; } /* Look at each index. */ for(; pProbe; pProbe=pProbe->pNext){ int i; /* Loop counter */ double inMultiplier = 1; TRACE(("... index %s:\n", pProbe->zName)); /* Count the number of columns in the index that are satisfied ** by x=EXPR constraints or x IN (...) constraints. */ flags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ|WO_IN, pProbe); if( pTerm==0 ) break; flags |= WHERE_COLUMN_EQ; if( pTerm->eOperator & WO_IN ){ Expr *pExpr = pTerm->pExpr; flags |= WHERE_COLUMN_IN; if( pExpr->pSelect!=0 ){ inMultiplier *= 25; }else if( pExpr->pList!=0 ){ inMultiplier *= pExpr->pList->nExpr + 1; } } } cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier); nEq = i; if( pProbe->onError!=OE_None && (flags & WHERE_COLUMN_IN)==0 && nEq==pProbe->nColumn ){ flags |= WHERE_UNIQUE; } TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost)); /* Look for range constraints */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe); if( pTerm ){ flags |= WHERE_COLUMN_RANGE; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){ flags |= WHERE_TOP_LIMIT; cost /= 3; } if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){ flags |= WHERE_BTM_LIMIT; cost /= 3; } TRACE(("...... range reduces cost to %.9g\n", cost)); } } /* Add the additional cost of sorting if that is a factor. */ if( pOrderBy ){ if( (flags & WHERE_COLUMN_IN)==0 && isSortingIndex(pParse,pProbe,iCur,pOrderBy,nEq,&rev) ){ if( flags==0 ){ flags = WHERE_COLUMN_RANGE; } flags |= WHERE_ORDERBY; if( rev ){ flags |= WHERE_REVERSE; } }else{ cost += cost*estLog(cost); TRACE(("...... orderby increases cost to %.9g\n", cost)); } } /* Check to see if we can get away with using just the index without ** ever reading the table. If that is the case, then halve the ** cost of this index. */ if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){ Bitmask m = pSrc->colUsed; int j; for(j=0; j<pProbe->nColumn; j++){ int x = pProbe->aiColumn[j]; if( x<BMS-1 ){ m &= ~(((Bitmask)1)<<x); } } if( m==0 ){ flags |= WHERE_IDX_ONLY; cost /= 2; TRACE(("...... idx-only reduces cost to %.9g\n", cost)); } } /* If this index has achieved the lowest cost so far, then use it. */ if( cost < lowestCost ){ bestIdx = pProbe; lowestCost = cost; assert( flags!=0 ); bestFlags = flags; bestNEq = nEq; } } /* Report the best result */ *ppIndex = bestIdx; TRACE(("best index is %s, cost=%.9g, flags=%x, nEq=%d\n", bestIdx ? bestIdx->zName : "(none)", lowestCost, bestFlags, bestNEq)); *pFlags = bestFlags; *pnEq = bestNEq; return lowestCost;}/*** Disable a term in the WHERE clause. Except, do not disable the term** if it controls a LEFT OUTER JOIN and it did not originate in the ON** or USING clause of that join.**** Consider the term t2.z='ok' in the following queries:**** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok'** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok'** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok'**** The t2.z='ok' is disabled in the in (2) because it originates** in the ON clause. The term is disabled in (3) because it is not part** of a LEFT OUTER JOIN. In (1), the term is not disabled.**** Disabling a term causes that term to not be tested in the inner loop** of the join. Disabling is an optimization. When terms are satisfied** by indices, we disable them to prevent redundant tests in the inner** loop. We would get the correct results if nothing were ever disabled,** but joins might run a little slower. The trick is to disable as much** as we can without disabling too much. If we disabled in (1), we'd get** the wrong answer. See ticket #813.*/static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ if( pTerm && (pTerm->flags & TERM_CODED)==0 && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) ){ pTerm->flags |= TERM_CODED; if( pTerm->iParent>=0 ){ WhereTerm *pOther = &pTerm->pWC->a[pTerm->iParent]; if( (--pOther->nChild)==0 ){ disableTerm(pLevel, pOther); } } }}/*** Generate code that builds a probe for an index. Details:**** * Check the top nColumn entries on the stack. If any** of those entries are NULL, jump immediately to brk,** which is the loop exit, since no index entry will match** if any part of the key is NULL. Pop (nColumn+nExtra) ** elements from the stack.**** * Construct a probe entry from the top nColumn entries in** the stack with affinities appropriate for index pIdx.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -