📄 where.c
字号:
Index *pProbe; /* An index we are evaluating */ int rev; /* True to scan in reverse order */ int flags; /* Flags associated with pProbe */ int nEq; /* Number of == or IN constraints */ int eqTermMask; /* Mask of valid equality operators */ double cost; /* Cost of using pProbe */ WHERETRACE(("bestIndex: tbl=%s notReady=%llx\n", pSrc->pTab->zName, notReady)); lowestCost = SQLITE_BIG_DBL; pProbe = pSrc->pTab->pIndex; /* If the table has no indices and there are no terms in the where ** clause that refer to the ROWID, then we will never be able to do ** anything other than a full table scan on this table. We might as ** well put it first in the join order. That way, perhaps it can be ** referenced by other tables in the join. */ if( pProbe==0 && findTerm(pWC, iCur, -1, 0, WO_EQ|WO_IN|WO_LT|WO_LE|WO_GT|WO_GE,0)==0 && (pOrderBy==0 || !sortableByRowid(iCur, pOrderBy, pWC->pMaskSet, &rev)) ){ *pFlags = 0; *ppIndex = 0; *pnEq = 0; return 0.0; } /* Check for a rowid=EXPR or rowid IN (...) constraints */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); if( pTerm ){ Expr *pExpr; *ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->eOperator & WO_EQ ){ /* Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order */ *pFlags = WHERE_ROWID_EQ | WHERE_UNIQUE; *pnEq = 1; WHERETRACE(("... best is rowid\n")); return 0.0; }else if( (pExpr = pTerm->pExpr)->pList!=0 ){ /* Rowid IN (LIST): cost is NlogN where N is the number of list ** elements. */ lowestCost = pExpr->pList->nExpr; lowestCost *= estLog(lowestCost); }else{ /* Rowid IN (SELECT): cost is NlogN where N is the number of rows ** in the result of the inner select. We have no way to estimate ** that value so make a wild guess. */ lowestCost = 200; } WHERETRACE(("... rowid IN cost: %.9g\n", lowestCost)); } /* Estimate the cost of a table scan. If we do not know how many ** entries are in the table, use 1 million as a guess. */ cost = pProbe ? pProbe->aiRowEst[0] : 1000000; WHERETRACE(("... table scan base cost: %.9g\n", cost)); flags = WHERE_ROWID_RANGE; /* Check for constraints on a range of rowids in a table scan. */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0); if( pTerm ){ if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){ flags |= WHERE_TOP_LIMIT; cost /= 3; /* Guess that rowid<EXPR eliminates two-thirds or rows */ } if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){ flags |= WHERE_BTM_LIMIT; cost /= 3; /* Guess that rowid>EXPR eliminates two-thirds of rows */ } WHERETRACE(("... rowid range reduces cost to %.9g\n", cost)); }else{ flags = 0; } /* If the table scan does not satisfy the ORDER BY clause, increase ** the cost by NlogN to cover the expense of sorting. */ if( pOrderBy ){ if( sortableByRowid(iCur, pOrderBy, pWC->pMaskSet, &rev) ){ flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE; if( rev ){ flags |= WHERE_REVERSE; } }else{ cost += cost*estLog(cost); WHERETRACE(("... sorting increases cost to %.9g\n", cost)); } } if( cost<lowestCost ){ lowestCost = cost; bestFlags = flags; } /* If the pSrc table is the right table of a LEFT JOIN then we may not ** use an index to satisfy IS NULL constraints on that table. This is ** because columns might end up being NULL if the table does not match - ** a circumstance which the index cannot help us discover. Ticket #2177. */ if( (pSrc->jointype & JT_LEFT)!=0 ){ eqTermMask = WO_EQ|WO_IN; }else{ eqTermMask = WO_EQ|WO_IN|WO_ISNULL; } /* Look at each index. */ for(; pProbe; pProbe=pProbe->pNext){ int i; /* Loop counter */ double inMultiplier = 1; WHERETRACE(("... index %s:\n", pProbe->zName)); /* Count the number of columns in the index that are satisfied ** by x=EXPR constraints or x IN (...) constraints. */ flags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe); if( pTerm==0 ) break; flags |= WHERE_COLUMN_EQ; if( pTerm->eOperator & WO_IN ){ Expr *pExpr = pTerm->pExpr; flags |= WHERE_COLUMN_IN; if( pExpr->pSelect!=0 ){ inMultiplier *= 25; }else if( ALWAYS(pExpr->pList) ){ inMultiplier *= pExpr->pList->nExpr + 1; } } } cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier); nEq = i; if( pProbe->onError!=OE_None && (flags & WHERE_COLUMN_IN)==0 && nEq==pProbe->nColumn ){ flags |= WHERE_UNIQUE; } WHERETRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n",nEq,inMultiplier,cost)); /* Look for range constraints */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe); if( pTerm ){ flags |= WHERE_COLUMN_RANGE; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){ flags |= WHERE_TOP_LIMIT; cost /= 3; } if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){ flags |= WHERE_BTM_LIMIT; cost /= 3; } WHERETRACE(("...... range reduces cost to %.9g\n", cost)); } } /* Add the additional cost of sorting if that is a factor. */ if( pOrderBy ){ if( (flags & WHERE_COLUMN_IN)==0 && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev) ){ if( flags==0 ){ flags = WHERE_COLUMN_RANGE; } flags |= WHERE_ORDERBY; if( rev ){ flags |= WHERE_REVERSE; } }else{ cost += cost*estLog(cost); WHERETRACE(("...... orderby increases cost to %.9g\n", cost)); } } /* Check to see if we can get away with using just the index without ** ever reading the table. If that is the case, then halve the ** cost of this index. */ if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){ Bitmask m = pSrc->colUsed; int j; for(j=0; j<pProbe->nColumn; j++){ int x = pProbe->aiColumn[j]; if( x<BMS-1 ){ m &= ~(((Bitmask)1)<<x); } } if( m==0 ){ flags |= WHERE_IDX_ONLY; cost /= 2; WHERETRACE(("...... idx-only reduces cost to %.9g\n", cost)); } } /* If this index has achieved the lowest cost so far, then use it. */ if( flags && cost < lowestCost ){ bestIdx = pProbe; lowestCost = cost; bestFlags = flags; bestNEq = nEq; } } /* Report the best result */ *ppIndex = bestIdx; WHERETRACE(("best index is %s, cost=%.9g, flags=%x, nEq=%d\n", bestIdx ? bestIdx->zName : "(none)", lowestCost, bestFlags, bestNEq)); *pFlags = bestFlags | eqTermMask; *pnEq = bestNEq; return lowestCost;}/*** Disable a term in the WHERE clause. Except, do not disable the term** if it controls a LEFT OUTER JOIN and it did not originate in the ON** or USING clause of that join.**** Consider the term t2.z='ok' in the following queries:**** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok'** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok'** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok'**** The t2.z='ok' is disabled in the in (2) because it originates** in the ON clause. The term is disabled in (3) because it is not part** of a LEFT OUTER JOIN. In (1), the term is not disabled.**** Disabling a term causes that term to not be tested in the inner loop** of the join. Disabling is an optimization. When terms are satisfied** by indices, we disable them to prevent redundant tests in the inner** loop. We would get the correct results if nothing were ever disabled,** but joins might run a little slower. The trick is to disable as much** as we can without disabling too much. If we disabled in (1), we'd get** the wrong answer. See ticket #813.*/static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ if( pTerm && ALWAYS((pTerm->flags & TERM_CODED)==0) && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) ){ pTerm->flags |= TERM_CODED; if( pTerm->iParent>=0 ){ WhereTerm *pOther = &pTerm->pWC->a[pTerm->iParent]; if( (--pOther->nChild)==0 ){ disableTerm(pLevel, pOther); } } }}/*** Apply the affinities associated with the first n columns of index** pIdx to the values in the n registers starting at base.*/static void codeApplyAffinity(Parse *pParse, int base, int n, Index *pIdx){ if( n>0 ){ Vdbe *v = pParse->pVdbe; assert( v!=0 ); sqlite3VdbeAddOp2(v, OP_Affinity, base, n); sqlite3IndexAffinityStr(v, pIdx); sqlite3ExprCacheAffinityChange(pParse, base, n); }}/*** Generate code for a single equality term of the WHERE clause. An equality** term can be either X=expr or X IN (...). pTerm is the term to be ** coded.**** The current value for the constraint is left in register iReg.**** For a constraint of the form X=expr, the expression is evaluated and its** result is left on the stack. For constraints of the form X IN (...)** this routine sets up a loop that will iterate over all values of X.*/static int codeEqualityTerm( Parse *pParse, /* The parsing context */ WhereTerm *pTerm, /* The term of the WHERE clause to be coded */ WhereLevel *pLevel, /* When level of the FROM clause we are working on */ int iTarget /* Attempt to leave results in this register */){ Expr *pX = pTerm->pExpr; Vdbe *v = pParse->pVdbe; int iReg; /* Register holding results */ if( iTarget<=0 ){ iReg = iTarget = sqlite3GetTempReg(pParse); } if( pX->op==TK_EQ ){ iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget); }else if( pX->op==TK_ISNULL ){ iReg = iTarget; sqlite3VdbeAddOp2(v, OP_Null, 0, iReg);#ifndef SQLITE_OMIT_SUBQUERY }else{ int eType; int iTab; struct InLoop *pIn; assert( pX->op==TK_IN ); iReg = iTarget; eType = sqlite3FindInIndex(pParse, pX, 0); iTab = pX->iTable; sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeComment((v, "%.*s", pX->span.n, pX->span.z)); if( pLevel->nIn==0 ){ pLevel->nxt = sqlite3VdbeMakeLabel(v); } pLevel->nIn++; pLevel->aInLoop = sqlite3DbReallocOrFree(pParse->db, pLevel->aInLoop, sizeof(pLevel->aInLoop[0])*pLevel->nIn); pIn = pLevel->aInLoop; if( pIn ){ pIn += pLevel->nIn - 1; pIn->iCur = iTab; if( eType==IN_INDEX_ROWID ){ pIn->topAddr = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iReg); }else{ pIn->topAddr = sqlite3VdbeAddOp3(v, OP_Column, iTab, 0, iReg); } sqlite3VdbeAddOp1(v, OP_IsNull, iReg); }else{ pLevel->nIn = 0; }#endif } disableTerm(pLevel, pTerm); return iReg;}/*** Generate code that will evaluate all == and IN constraints for an** index. The values for all constraints are left on the stack.**** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c).** Suppose the WHERE clause is this: a==5 AND b IN (1,2,3) AND c>5 AND c<10** The index has as many as three equality constraints, but in this** example, the third "c" value is an inequality. So only two ** constraints are coded. This routine will generate code to evaluate** a==5 and b IN (1,2,3). The current values for a and b will be left** on the stack - a is the deepest and b the shallowest.**** In the example above nEq==2. But this subroutine works for any value** of nEq including 0. If nEq==0, this routine is nearly a no-op.** The only thing it does is allocate the pLevel->iMem memory cell.**** This routine always allocates at least one memory cell and puts** the address of that memory cell in pLevel->iMem. The code that** calls this routine will use pLevel->iMem to store the termination** key value of the loop. If one or more IN operators appear, then** this routine allocates an additional nEq memory cells for internal** use.*/static int codeAllEqualityTerms( Parse *pParse, /* Parsing context */ WhereLevel *pLevel, /* Which nested loop of the FROM we are coding */ WhereClause *pWC, /* The WHERE clause */ Bitmask notReady, /* Which parts of FROM have not yet been coded */ int nExtraReg /* Number of extra registers to allocate */){ int nEq = pLevel->nEq; /* The number of == or IN constraints to code */ Vdbe *v = pParse->pVdbe; /* The virtual machine under construction */ Index *pIdx =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -