where.c

来自「sqlite 3.3.8 支持加密的版本」· C语言 代码 · 共 1,821 行 · 第 1/5 页

C
1,821
字号
    for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){
      if( pTerm->leftCursor != pSrc->iCursor ) continue;
      if( pTerm->eOperator==WO_IN ) continue;
      pIdxCons[j].iColumn = pTerm->leftColumn;
      pIdxCons[j].iTermOffset = i;
      pIdxCons[j].op = pTerm->eOperator;
      /* The direct assignment in the previous line is possible only because
      ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical.  The
      ** following asserts verify this fact. */
      assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ );
      assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT );
      assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE );
      assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT );
      assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE );
      assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH );
      assert( pTerm->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) );
      j++;
    }
    for(i=0; i<nOrderBy; i++){
      Expr *pExpr = pOrderBy->a[i].pExpr;
      pIdxOrderBy[i].iColumn = pExpr->iColumn;
      pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder;
    }
  }

  /* At this point, the sqlite3_index_info structure that pIdxInfo points
  ** to will have been initialized, either during the current invocation or
  ** during some prior invocation.  Now we just have to customize the
  ** details of pIdxInfo for the current invocation and pass it to
  ** xBestIndex.
  */

  /* The module name must be defined */
  assert( pTab->azModuleArg && pTab->azModuleArg[0] );
  if( pTab->pVtab==0 ){
    sqlite3ErrorMsg(pParse, "undefined module %s for table %s",
        pTab->azModuleArg[0], pTab->zName);
    return 0.0;
  }

  /* Set the aConstraint[].usable fields and initialize all 
  ** output variables to zero.
  **
  ** aConstraint[].usable is true for constraints where the right-hand
  ** side contains only references to tables to the left of the current
  ** table.  In other words, if the constraint is of the form:
  **
  **           column = expr
  **
  ** and we are evaluating a join, then the constraint on column is 
  ** only valid if all tables referenced in expr occur to the left
  ** of the table containing column.
  **
  ** The aConstraints[] array contains entries for all constraints
  ** on the current table.  That way we only have to compute it once
  ** even though we might try to pick the best index multiple times.
  ** For each attempt at picking an index, the order of tables in the
  ** join might be different so we have to recompute the usable flag
  ** each time.
  */
  pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint;
  pUsage = pIdxInfo->aConstraintUsage;
  for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){
    j = pIdxCons->iTermOffset;
    pTerm = &pWC->a[j];
    pIdxCons->usable =  (pTerm->prereqRight & notReady)==0;
  }
  memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint);
  if( pIdxInfo->needToFreeIdxStr ){
    sqlite3_free(pIdxInfo->idxStr);
  }
  pIdxInfo->idxStr = 0;
  pIdxInfo->idxNum = 0;
  pIdxInfo->needToFreeIdxStr = 0;
  pIdxInfo->orderByConsumed = 0;
  pIdxInfo->estimatedCost = SQLITE_BIG_DBL / 2.0;
  nOrderBy = pIdxInfo->nOrderBy;
  if( pIdxInfo->nOrderBy && !orderByUsable ){
    *(int*)&pIdxInfo->nOrderBy = 0;
  }

  sqlite3SafetyOff(pParse->db);
  TRACE(("xBestIndex for %s\n", pTab->zName));
  TRACE_IDX_INPUTS(pIdxInfo);
  rc = pTab->pVtab->pModule->xBestIndex(pTab->pVtab, pIdxInfo);
  TRACE_IDX_OUTPUTS(pIdxInfo);
  if( rc!=SQLITE_OK ){
    if( rc==SQLITE_NOMEM ){
      sqlite3FailedMalloc();
    }else {
      sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc));
    }
    sqlite3SafetyOn(pParse->db);
  }else{
    rc = sqlite3SafetyOn(pParse->db);
  }
  *(int*)&pIdxInfo->nOrderBy = nOrderBy;
  return pIdxInfo->estimatedCost;
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Find the best index for accessing a particular table.  Return a pointer
** to the index, flags that describe how the index should be used, the
** number of equality constraints, and the "cost" for this index.
**
** The lowest cost index wins.  The cost is an estimate of the amount of
** CPU and disk I/O need to process the request using the selected index.
** Factors that influence cost include:
**
**    *  The estimated number of rows that will be retrieved.  (The
**       fewer the better.)
**
**    *  Whether or not sorting must occur.
**
**    *  Whether or not there must be separate lookups in the
**       index and in the main table.
**
*/
static double bestIndex(
  Parse *pParse,              /* The parsing context */
  WhereClause *pWC,           /* The WHERE clause */
  struct SrcList_item *pSrc,  /* The FROM clause term to search */
  Bitmask notReady,           /* Mask of cursors that are not available */
  ExprList *pOrderBy,         /* The order by clause */
  Index **ppIndex,            /* Make *ppIndex point to the best index */
  int *pFlags,                /* Put flags describing this choice in *pFlags */
  int *pnEq                   /* Put the number of == or IN constraints here */
){
  WhereTerm *pTerm;
  Index *bestIdx = 0;         /* Index that gives the lowest cost */
  double lowestCost;          /* The cost of using bestIdx */
  int bestFlags = 0;          /* Flags associated with bestIdx */
  int bestNEq = 0;            /* Best value for nEq */
  int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
  Index *pProbe;              /* An index we are evaluating */
  int rev;                    /* True to scan in reverse order */
  int flags;                  /* Flags associated with pProbe */
  int nEq;                    /* Number of == or IN constraints */
  double cost;                /* Cost of using pProbe */

  TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady));
  lowestCost = SQLITE_BIG_DBL;
  pProbe = pSrc->pTab->pIndex;

  /* If the table has no indices and there are no terms in the where
  ** clause that refer to the ROWID, then we will never be able to do
  ** anything other than a full table scan on this table.  We might as
  ** well put it first in the join order.  That way, perhaps it can be
  ** referenced by other tables in the join.
  */
  if( pProbe==0 &&
     findTerm(pWC, iCur, -1, 0, WO_EQ|WO_IN|WO_LT|WO_LE|WO_GT|WO_GE,0)==0 &&
     (pOrderBy==0 || !sortableByRowid(iCur, pOrderBy, &rev)) ){
    *pFlags = 0;
    *ppIndex = 0;
    *pnEq = 0;
    return 0.0;
  }

  /* Check for a rowid=EXPR or rowid IN (...) constraints
  */
  pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0);
  if( pTerm ){
    Expr *pExpr;
    *ppIndex = 0;
    bestFlags = WHERE_ROWID_EQ;
    if( pTerm->eOperator & WO_EQ ){
      /* Rowid== is always the best pick.  Look no further.  Because only
      ** a single row is generated, output is always in sorted order */
      *pFlags = WHERE_ROWID_EQ | WHERE_UNIQUE;
      *pnEq = 1;
      TRACE(("... best is rowid\n"));
      return 0.0;
    }else if( (pExpr = pTerm->pExpr)->pList!=0 ){
      /* Rowid IN (LIST): cost is NlogN where N is the number of list
      ** elements.  */
      lowestCost = pExpr->pList->nExpr;
      lowestCost *= estLog(lowestCost);
    }else{
      /* Rowid IN (SELECT): cost is NlogN where N is the number of rows
      ** in the result of the inner select.  We have no way to estimate
      ** that value so make a wild guess. */
      lowestCost = 200;
    }
    TRACE(("... rowid IN cost: %.9g\n", lowestCost));
  }

  /* Estimate the cost of a table scan.  If we do not know how many
  ** entries are in the table, use 1 million as a guess.
  */
  cost = pProbe ? pProbe->aiRowEst[0] : 1000000;
  TRACE(("... table scan base cost: %.9g\n", cost));
  flags = WHERE_ROWID_RANGE;

  /* Check for constraints on a range of rowids in a table scan.
  */
  pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0);
  if( pTerm ){
    if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){
      flags |= WHERE_TOP_LIMIT;
      cost /= 3;  /* Guess that rowid<EXPR eliminates two-thirds or rows */
    }
    if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){
      flags |= WHERE_BTM_LIMIT;
      cost /= 3;  /* Guess that rowid>EXPR eliminates two-thirds of rows */
    }
    TRACE(("... rowid range reduces cost to %.9g\n", cost));
  }else{
    flags = 0;
  }

  /* If the table scan does not satisfy the ORDER BY clause, increase
  ** the cost by NlogN to cover the expense of sorting. */
  if( pOrderBy ){
    if( sortableByRowid(iCur, pOrderBy, &rev) ){
      flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE;
      if( rev ){
        flags |= WHERE_REVERSE;
      }
    }else{
      cost += cost*estLog(cost);
      TRACE(("... sorting increases cost to %.9g\n", cost));
    }
  }
  if( cost<lowestCost ){
    lowestCost = cost;
    bestFlags = flags;
  }

  /* Look at each index.
  */
  for(; pProbe; pProbe=pProbe->pNext){
    int i;                       /* Loop counter */
    double inMultiplier = 1;

    TRACE(("... index %s:\n", pProbe->zName));

    /* Count the number of columns in the index that are satisfied
    ** by x=EXPR constraints or x IN (...) constraints.
    */
    flags = 0;
    for(i=0; i<pProbe->nColumn; i++){
      int j = pProbe->aiColumn[i];
      pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ|WO_IN, pProbe);
      if( pTerm==0 ) break;
      flags |= WHERE_COLUMN_EQ;
      if( pTerm->eOperator & WO_IN ){
        Expr *pExpr = pTerm->pExpr;
        flags |= WHERE_COLUMN_IN;
        if( pExpr->pSelect!=0 ){
          inMultiplier *= 25;
        }else if( pExpr->pList!=0 ){
          inMultiplier *= pExpr->pList->nExpr + 1;
        }
      }
    }
    cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier);
    nEq = i;
    if( pProbe->onError!=OE_None && (flags & WHERE_COLUMN_IN)==0
         && nEq==pProbe->nColumn ){
      flags |= WHERE_UNIQUE;
    }
    TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost));

    /* Look for range constraints
    */
    if( nEq<pProbe->nColumn ){
      int j = pProbe->aiColumn[nEq];
      pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe);
      if( pTerm ){
        flags |= WHERE_COLUMN_RANGE;
        if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){
          flags |= WHERE_TOP_LIMIT;
          cost /= 3;
        }
        if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){
          flags |= WHERE_BTM_LIMIT;
          cost /= 3;
        }
        TRACE(("...... range reduces cost to %.9g\n", cost));
      }
    }

    /* Add the additional cost of sorting if that is a factor.
    */
    if( pOrderBy ){
      if( (flags & WHERE_COLUMN_IN)==0 &&
           isSortingIndex(pParse,pProbe,iCur,pOrderBy,nEq,&rev) ){
        if( flags==0 ){
          flags = WHERE_COLUMN_RANGE;
        }
        flags |= WHERE_ORDERBY;
        if( rev ){
          flags |= WHERE_REVERSE;
        }
      }else{
        cost += cost*estLog(cost);
        TRACE(("...... orderby increases cost to %.9g\n", cost));
      }
    }

    /* Check to see if we can get away with using just the index without
    ** ever reading the table.  If that is the case, then halve the
    ** cost of this index.
    */
    if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){
      Bitmask m = pSrc->colUsed;
      int j;
      for(j=0; j<pProbe->nColumn; j++){
        int x = pProbe->aiColumn[j];
        if( x<BMS-1 ){
          m &= ~(((Bitmask)1)<<x);
        }
      }
      if( m==0 ){
        flags |= WHERE_IDX_ONLY;
        cost /= 2;
        TRACE(("...... idx-only reduces cost to %.9g\n", cost));
      }
    }

    /* If this index has achieved the lowest cost so far, then use it.
    */
    if( cost < lowestCost ){
      bestIdx = pProbe;
      lowestCost = cost;
      assert( flags!=0 );
      bestFlags = flags;
      bestNEq = nEq;
    }
  }

  /* Report the best result
  */
  *ppIndex = bestIdx;
  TRACE(("best index is %s, cost=%.9g, flags=%x, nEq=%d\n",
        bestIdx ? bestIdx->zName : "(none)", lowestCost, bestFlags, bestNEq));
  *pFlags = bestFlags;
  *pnEq = bestNEq;
  return lowestCost;
}


/*
** Disable a term in the WHERE clause.  Except, do not disable the term
** if it controls a LEFT OUTER JOIN and it did not originate in the ON
** or USING clause of that join.
**
** Consider the term t2.z='ok' in the following queries:
**
**   (1)  SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok'
**   (2)  SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok'
**   (3)  SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok'
**
** The t2.z='ok' is disabled in the in (2) because it originates
** in the ON clause.  The term is disabled in (3) because it is not part
** of a LEFT OUTER JOIN.  In (1), the term is not disabled.
**
** Disabling a term causes that term to not be tested in the inner loop
** of the join.  Disabling is an optimization.  When terms are satisfied
** by indices, we disable them to prevent redundant tests in the inner
** loop.  We would get the correct results if nothing were ever disabled,
** but joins might run a little slower.  The trick is to disable as much
** as we can without disabling too much.  If we disabled in (1), we'd get

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?