⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fts3_expr.c

📁 sqlite最新源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/*** 2008 Nov 28**** The author disclaims copyright to this source code.  In place of** a legal notice, here is a blessing:****    May you do good and not evil.**    May you find forgiveness for yourself and forgive others.**    May you share freely, never taking more than you give.************************************************************************************ This module contains code that implements a parser for fts3 query strings** (the right-hand argument to the MATCH operator). Because the supported ** syntax is relatively simple, the whole tokenizer/parser system is** hand-coded. The public interface to this module is declared in source** code file "fts3_expr.h".*/#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)/*** By default, this module parses the legacy syntax that has been ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS** is defined, then it uses the new syntax. The differences between** the new and the old syntaxes are:****  a) The new syntax supports parenthesis. The old does not.****  b) The new syntax supports the AND and NOT operators. The old does not.****  c) The old syntax supports the "-" token qualifier. This is not **     supported by the new syntax (it is replaced by the NOT operator).****  d) When using the old syntax, the OR operator has a greater precedence**     than an implicit AND. When using the new, both implicity and explicit**     AND operators have a higher precedence than OR.**** If compiled with SQLITE_TEST defined, then this module exports the** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable** to zero causes the module to use the old syntax. If it is set to ** non-zero the new syntax is activated. This is so both syntaxes can** be tested using a single build of testfixture.*/#ifdef SQLITE_TESTint sqlite3_fts3_enable_parentheses = 0;#else# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS #  define sqlite3_fts3_enable_parentheses 1# else#  define sqlite3_fts3_enable_parentheses 0# endif#endif/*** Default span for NEAR operators.*/#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10#include "fts3_expr.h"#include "sqlite3.h"#include <ctype.h>#include <string.h>#include <assert.h>typedef struct ParseContext ParseContext;struct ParseContext {  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */  const char **azCol;                 /* Array of column names for fts3 table */  int nCol;                           /* Number of entries in azCol[] */  int iDefaultCol;                    /* Default column to query */  sqlite3_context *pCtx;              /* Write error message here */  int nNest;                          /* Number of nested brackets */};/*** This function is equivalent to the standard isspace() function. **** The standard isspace() can be awkward to use safely, because although it** is defined to accept an argument of type int, its behaviour when passed** an integer that falls outside of the range of the unsigned char type** is undefined (and sometimes, "undefined" means segfault). This wrapper** is defined to accept an argument of type char, and always returns 0 for** any values that fall outside of the range of the unsigned char type (i.e.** negative values).*/static int fts3isspace(char c){  return (c&0x80)==0 ? isspace(c) : 0;}/*** Extract the next token from buffer z (length n) using the tokenizer** and other information (column names etc.) in pParse. Create an Fts3Expr** structure of type FTSQUERY_PHRASE containing a phrase consisting of this** single token and set *ppExpr to point to it. If the end of the buffer is** reached before a token is found, set *ppExpr to zero. It is the** responsibility of the caller to eventually deallocate the allocated ** Fts3Expr structure (if any) by passing it to sqlite3_free().**** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation** fails.*/static int getNextToken(  ParseContext *pParse,                   /* fts3 query parse context */  int iCol,                               /* Value for Fts3Phrase.iColumn */  const char *z, int n,                   /* Input string */  Fts3Expr **ppExpr,                      /* OUT: expression */  int *pnConsumed                         /* OUT: Number of bytes consumed */){  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;  int rc;  sqlite3_tokenizer_cursor *pCursor;  Fts3Expr *pRet = 0;  int nConsumed = 0;  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);  if( rc==SQLITE_OK ){    const char *zToken;    int nToken, iStart, iEnd, iPosition;    int nByte;                               /* total space to allocate */    pCursor->pTokenizer = pTokenizer;    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);    if( rc==SQLITE_OK ){      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;      pRet = (Fts3Expr *)sqlite3_malloc(nByte);      if( !pRet ){        rc = SQLITE_NOMEM;      }else{        memset(pRet, 0, nByte);        pRet->eType = FTSQUERY_PHRASE;        pRet->pPhrase = (Fts3Phrase *)&pRet[1];        pRet->pPhrase->nToken = 1;        pRet->pPhrase->iColumn = iCol;        pRet->pPhrase->aToken[0].n = nToken;        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);        if( iEnd<n && z[iEnd]=='*' ){          pRet->pPhrase->aToken[0].isPrefix = 1;          iEnd++;        }        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){          pRet->pPhrase->isNot = 1;        }      }      nConsumed = iEnd;    }    pModule->xClose(pCursor);  }    *pnConsumed = nConsumed;  *ppExpr = pRet;  return rc;}/*** Enlarge a memory allocation.  If an out-of-memory allocation occurs,** then free the old allocation.*/void *fts3ReallocOrFree(void *pOrig, int nNew){  void *pRet = sqlite3_realloc(pOrig, nNew);  if( !pRet ){    sqlite3_free(pOrig);  }  return pRet;}/*** Buffer zInput, length nInput, contains the contents of a quoted string** that appeared as part of an fts3 query expression. Neither quote character** is included in the buffer. This function attempts to tokenize the entire** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE ** containing the results.**** If successful, SQLITE_OK is returned and *ppExpr set to point at the** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set** to 0.*/static int getNextString(  ParseContext *pParse,                   /* fts3 query parse context */  const char *zInput, int nInput,         /* Input string */  Fts3Expr **ppExpr                       /* OUT: expression */){  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;  int rc;  Fts3Expr *p = 0;  sqlite3_tokenizer_cursor *pCursor = 0;  char *zTemp = 0;  int nTemp = 0;  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);  if( rc==SQLITE_OK ){    int ii;    pCursor->pTokenizer = pTokenizer;    for(ii=0; rc==SQLITE_OK; ii++){      const char *zToken;      int nToken, iBegin, iEnd, iPos;      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);      if( rc==SQLITE_OK ){        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);        p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken));        zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);        if( !p || !zTemp ){          goto no_mem;        }        if( ii==0 ){          memset(p, 0, nByte);          p->pPhrase = (Fts3Phrase *)&p[1];          p->eType = FTSQUERY_PHRASE;          p->pPhrase->iColumn = pParse->iDefaultCol;        }        p->pPhrase = (Fts3Phrase *)&p[1];        p->pPhrase->nToken = ii+1;        p->pPhrase->aToken[ii].n = nToken;        memcpy(&zTemp[nTemp], zToken, nToken);        nTemp += nToken;        if( iEnd<nInput && zInput[iEnd]=='*' ){          p->pPhrase->aToken[ii].isPrefix = 1;        }else{          p->pPhrase->aToken[ii].isPrefix = 0;        }      }    }    pModule->xClose(pCursor);    pCursor = 0;  }  if( rc==SQLITE_DONE ){    int jj;    char *zNew;    int nNew = 0;    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);    nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);    p = fts3ReallocOrFree(p, nByte + nTemp);    if( !p ){      goto no_mem;    }    p->pPhrase = (Fts3Phrase *)&p[1];    zNew = &(((char *)p)[nByte]);    memcpy(zNew, zTemp, nTemp);    for(jj=0; jj<p->pPhrase->nToken; jj++){      p->pPhrase->aToken[jj].z = &zNew[nNew];      nNew += p->pPhrase->aToken[jj].n;    }    sqlite3_free(zTemp);    rc = SQLITE_OK;  }  *ppExpr = p;  return rc;no_mem:  if( pCursor ){    pModule->xClose(pCursor);  }  sqlite3_free(zTemp);  sqlite3_free(p);  *ppExpr = 0;  return SQLITE_NOMEM;}/*** Function getNextNode(), which is called by fts3ExprParse(), may itself** call fts3ExprParse(). So this forward declaration is required.*/static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);/*** The output variable *ppExpr is populated with an allocated Fts3Expr ** structure, or set to 0 if the end of the input buffer is reached.**** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.** If SQLITE_ERROR is returned, pContext is populated with an error message.*/static int getNextNode(  ParseContext *pParse,                   /* fts3 query parse context */  const char *z, int n,                   /* Input string */  Fts3Expr **ppExpr,                      /* OUT: expression */  int *pnConsumed                         /* OUT: Number of bytes consumed */){  static const struct Fts3Keyword {    char z[4];                            /* Keyword text */    unsigned char n;                      /* Length of the keyword */    unsigned char parenOnly;              /* Only valid in paren mode */    unsigned char eType;                  /* Keyword code */  } aKeyword[] = {    { "OR" ,  2, 0, FTSQUERY_OR   },    { "AND",  3, 1, FTSQUERY_AND  },    { "NOT",  3, 1, FTSQUERY_NOT  },    { "NEAR", 4, 0, FTSQUERY_NEAR }  };  int ii;  int iCol;  int iColLen;  int rc;  Fts3Expr *pRet = 0;  const char *zInput = z;  int nInput = n;  /* Skip over any whitespace before checking for a keyword, an open or  ** close bracket, or a quoted string.   */  while( nInput>0 && fts3isspace(*zInput) ){    nInput--;    zInput++;  }  if( nInput==0 ){    return SQLITE_DONE;  }  /* See if we are dealing with a keyword. */  for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){    const struct Fts3Keyword *pKey = &aKeyword[ii];    if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){      continue;    }    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;      int nKey = pKey->n;      char cNext;      /* If this is a "NEAR" keyword, check for an explicit nearness. */      if( pKey->eType==FTSQUERY_NEAR ){        assert( nKey==4 );        if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){          nNear = 0;          for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){            nNear = nNear * 10 + (zInput[nKey] - '0');          }        }      }      /* At this point this is probably a keyword. But for that to be true,      ** the next byte must contain either whitespace, an open or close      ** parenthesis, a quote character, or EOF.       */      cNext = zInput[nKey];      if( fts3isspace(cNext)        || cNext=='"' || cNext=='(' || cNext==')' || cNext==0      ){        pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));        memset(pRet, 0, sizeof(Fts3Expr));        pRet->eType = pKey->eType;        pRet->nNear = nNear;        *ppExpr = pRet;        *pnConsumed = (zInput - z) + nKey;        return SQLITE_OK;      }      /* Turns out that wasn't a keyword after all. This happens if the      ** user has supplied a token such as "ORacle". Continue.      */    }  }  /* Check for an open bracket. */  if( sqlite3_fts3_enable_parentheses ){    if( *zInput=='(' ){      int nConsumed;      int rc;      pParse->nNest++;      rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);      if( rc==SQLITE_OK && !*ppExpr ){        rc = SQLITE_DONE;      }      *pnConsumed = (zInput - z) + 1 + nConsumed;      return rc;    }      /* Check for a close bracket. */    if( *zInput==')' ){      pParse->nNest--;      *pnConsumed = (zInput - z) + 1;      return SQLITE_DONE;    }  }  /* See if we are dealing with a quoted phrase. If this is the case, then  ** search for the closing quote and pass the whole string to getNextString()  ** for processing. This is easy to do, as fts3 has no syntax for escaping  ** a quote character embedded in a string.  */  if( *zInput=='"' ){    for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);    *pnConsumed = (zInput - z) + ii + 1;    if( ii==nInput ){      return SQLITE_ERROR;    }    return getNextString(pParse, &zInput[1], ii-1, ppExpr);  }  /* If control flows to this point, this must be a regular token, or   ** the end of the input. Read a regular token using the sqlite3_tokenizer  ** interface. Before doing so, figure out if there is an explicit  ** column specifier for the token.   **  ** TODO: Strangely, it is not possible to associate a column specifier  ** with a quoted phrase, only with a single token. Not sure if this was  ** an implementation artifact or an intentional decision when fts3 was  ** first implemented. Whichever it was, this module duplicates the   ** limitation.  */  iCol = pParse->iDefaultCol;  iColLen = 0;  for(ii=0; ii<pParse->nCol; ii++){    const char *zStr = pParse->azCol[ii];    int nStr = strlen(zStr);    if( nInput>nStr && zInput[nStr]==':' && memcmp(zStr, zInput, nStr)==0 ){      iCol = ii;      iColLen = ((zInput - z) + nStr + 1);      break;    }  }  rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);  *pnConsumed += iColLen;  return rc;}/*** The argument is an Fts3Expr structure for a binary operator (any type** except an FTSQUERY_PHRASE). Return an integer value representing the** precedence of the operator. Lower values have a higher precedence (i.e.** group more tightly). For example, in the C language, the == operator** groups more tightly than ||, and would therefore have a higher precedence.**** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS** is defined), the order of the operators in precedence from highest to** lowest is:****   NEAR**   NOT**   AND (including implicit ANDs)**   OR**

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -