📄 fts3_expr.c
字号:
/*** 2008 Nov 28**** The author disclaims copyright to this source code. In place of** a legal notice, here is a blessing:**** May you do good and not evil.** May you find forgiveness for yourself and forgive others.** May you share freely, never taking more than you give.************************************************************************************ This module contains code that implements a parser for fts3 query strings** (the right-hand argument to the MATCH operator). Because the supported ** syntax is relatively simple, the whole tokenizer/parser system is** hand-coded. The public interface to this module is declared in source** code file "fts3_expr.h".*/#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)/*** By default, this module parses the legacy syntax that has been ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS** is defined, then it uses the new syntax. The differences between** the new and the old syntaxes are:**** a) The new syntax supports parenthesis. The old does not.**** b) The new syntax supports the AND and NOT operators. The old does not.**** c) The old syntax supports the "-" token qualifier. This is not ** supported by the new syntax (it is replaced by the NOT operator).**** d) When using the old syntax, the OR operator has a greater precedence** than an implicit AND. When using the new, both implicity and explicit** AND operators have a higher precedence than OR.**** If compiled with SQLITE_TEST defined, then this module exports the** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable** to zero causes the module to use the old syntax. If it is set to ** non-zero the new syntax is activated. This is so both syntaxes can** be tested using a single build of testfixture.*/#ifdef SQLITE_TESTint sqlite3_fts3_enable_parentheses = 0;#else# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS # define sqlite3_fts3_enable_parentheses 1# else# define sqlite3_fts3_enable_parentheses 0# endif#endif/*** Default span for NEAR operators.*/#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10#include "fts3_expr.h"#include "sqlite3.h"#include <ctype.h>#include <string.h>#include <assert.h>typedef struct ParseContext ParseContext;struct ParseContext { sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ const char **azCol; /* Array of column names for fts3 table */ int nCol; /* Number of entries in azCol[] */ int iDefaultCol; /* Default column to query */ sqlite3_context *pCtx; /* Write error message here */ int nNest; /* Number of nested brackets */};/*** This function is equivalent to the standard isspace() function. **** The standard isspace() can be awkward to use safely, because although it** is defined to accept an argument of type int, its behaviour when passed** an integer that falls outside of the range of the unsigned char type** is undefined (and sometimes, "undefined" means segfault). This wrapper** is defined to accept an argument of type char, and always returns 0 for** any values that fall outside of the range of the unsigned char type (i.e.** negative values).*/static int fts3isspace(char c){ return (c&0x80)==0 ? isspace(c) : 0;}/*** Extract the next token from buffer z (length n) using the tokenizer** and other information (column names etc.) in pParse. Create an Fts3Expr** structure of type FTSQUERY_PHRASE containing a phrase consisting of this** single token and set *ppExpr to point to it. If the end of the buffer is** reached before a token is found, set *ppExpr to zero. It is the** responsibility of the caller to eventually deallocate the allocated ** Fts3Expr structure (if any) by passing it to sqlite3_free().**** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation** fails.*/static int getNextToken( ParseContext *pParse, /* fts3 query parse context */ int iCol, /* Value for Fts3Phrase.iColumn */ const char *z, int n, /* Input string */ Fts3Expr **ppExpr, /* OUT: expression */ int *pnConsumed /* OUT: Number of bytes consumed */){ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; int rc; sqlite3_tokenizer_cursor *pCursor; Fts3Expr *pRet = 0; int nConsumed = 0; rc = pModule->xOpen(pTokenizer, z, n, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken, iStart, iEnd, iPosition; int nByte; /* total space to allocate */ pCursor->pTokenizer = pTokenizer; rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)sqlite3_malloc(nByte); if( !pRet ){ rc = SQLITE_NOMEM; }else{ memset(pRet, 0, nByte); pRet->eType = FTSQUERY_PHRASE; pRet->pPhrase = (Fts3Phrase *)&pRet[1]; pRet->pPhrase->nToken = 1; pRet->pPhrase->iColumn = iCol; pRet->pPhrase->aToken[0].n = nToken; pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); if( iEnd<n && z[iEnd]=='*' ){ pRet->pPhrase->aToken[0].isPrefix = 1; iEnd++; } if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ pRet->pPhrase->isNot = 1; } } nConsumed = iEnd; } pModule->xClose(pCursor); } *pnConsumed = nConsumed; *ppExpr = pRet; return rc;}/*** Enlarge a memory allocation. If an out-of-memory allocation occurs,** then free the old allocation.*/void *fts3ReallocOrFree(void *pOrig, int nNew){ void *pRet = sqlite3_realloc(pOrig, nNew); if( !pRet ){ sqlite3_free(pOrig); } return pRet;}/*** Buffer zInput, length nInput, contains the contents of a quoted string** that appeared as part of an fts3 query expression. Neither quote character** is included in the buffer. This function attempts to tokenize the entire** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE ** containing the results.**** If successful, SQLITE_OK is returned and *ppExpr set to point at the** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set** to 0.*/static int getNextString( ParseContext *pParse, /* fts3 query parse context */ const char *zInput, int nInput, /* Input string */ Fts3Expr **ppExpr /* OUT: expression */){ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; int rc; Fts3Expr *p = 0; sqlite3_tokenizer_cursor *pCursor = 0; char *zTemp = 0; int nTemp = 0; rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); if( rc==SQLITE_OK ){ int ii; pCursor->pTokenizer = pTokenizer; for(ii=0; rc==SQLITE_OK; ii++){ const char *zToken; int nToken, iBegin, iEnd, iPos; rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken)); zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); if( !p || !zTemp ){ goto no_mem; } if( ii==0 ){ memset(p, 0, nByte); p->pPhrase = (Fts3Phrase *)&p[1]; p->eType = FTSQUERY_PHRASE; p->pPhrase->iColumn = pParse->iDefaultCol; } p->pPhrase = (Fts3Phrase *)&p[1]; p->pPhrase->nToken = ii+1; p->pPhrase->aToken[ii].n = nToken; memcpy(&zTemp[nTemp], zToken, nToken); nTemp += nToken; if( iEnd<nInput && zInput[iEnd]=='*' ){ p->pPhrase->aToken[ii].isPrefix = 1; }else{ p->pPhrase->aToken[ii].isPrefix = 0; } } } pModule->xClose(pCursor); pCursor = 0; } if( rc==SQLITE_DONE ){ int jj; char *zNew; int nNew = 0; int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken); p = fts3ReallocOrFree(p, nByte + nTemp); if( !p ){ goto no_mem; } p->pPhrase = (Fts3Phrase *)&p[1]; zNew = &(((char *)p)[nByte]); memcpy(zNew, zTemp, nTemp); for(jj=0; jj<p->pPhrase->nToken; jj++){ p->pPhrase->aToken[jj].z = &zNew[nNew]; nNew += p->pPhrase->aToken[jj].n; } sqlite3_free(zTemp); rc = SQLITE_OK; } *ppExpr = p; return rc;no_mem: if( pCursor ){ pModule->xClose(pCursor); } sqlite3_free(zTemp); sqlite3_free(p); *ppExpr = 0; return SQLITE_NOMEM;}/*** Function getNextNode(), which is called by fts3ExprParse(), may itself** call fts3ExprParse(). So this forward declaration is required.*/static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);/*** The output variable *ppExpr is populated with an allocated Fts3Expr ** structure, or set to 0 if the end of the input buffer is reached.**** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.** If SQLITE_ERROR is returned, pContext is populated with an error message.*/static int getNextNode( ParseContext *pParse, /* fts3 query parse context */ const char *z, int n, /* Input string */ Fts3Expr **ppExpr, /* OUT: expression */ int *pnConsumed /* OUT: Number of bytes consumed */){ static const struct Fts3Keyword { char z[4]; /* Keyword text */ unsigned char n; /* Length of the keyword */ unsigned char parenOnly; /* Only valid in paren mode */ unsigned char eType; /* Keyword code */ } aKeyword[] = { { "OR" , 2, 0, FTSQUERY_OR }, { "AND", 3, 1, FTSQUERY_AND }, { "NOT", 3, 1, FTSQUERY_NOT }, { "NEAR", 4, 0, FTSQUERY_NEAR } }; int ii; int iCol; int iColLen; int rc; Fts3Expr *pRet = 0; const char *zInput = z; int nInput = n; /* Skip over any whitespace before checking for a keyword, an open or ** close bracket, or a quoted string. */ while( nInput>0 && fts3isspace(*zInput) ){ nInput--; zInput++; } if( nInput==0 ){ return SQLITE_DONE; } /* See if we are dealing with a keyword. */ for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ const struct Fts3Keyword *pKey = &aKeyword[ii]; if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ continue; } if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; int nKey = pKey->n; char cNext; /* If this is a "NEAR" keyword, check for an explicit nearness. */ if( pKey->eType==FTSQUERY_NEAR ){ assert( nKey==4 ); if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ nNear = 0; for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ nNear = nNear * 10 + (zInput[nKey] - '0'); } } } /* At this point this is probably a keyword. But for that to be true, ** the next byte must contain either whitespace, an open or close ** parenthesis, a quote character, or EOF. */ cNext = zInput[nKey]; if( fts3isspace(cNext) || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 ){ pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr)); memset(pRet, 0, sizeof(Fts3Expr)); pRet->eType = pKey->eType; pRet->nNear = nNear; *ppExpr = pRet; *pnConsumed = (zInput - z) + nKey; return SQLITE_OK; } /* Turns out that wasn't a keyword after all. This happens if the ** user has supplied a token such as "ORacle". Continue. */ } } /* Check for an open bracket. */ if( sqlite3_fts3_enable_parentheses ){ if( *zInput=='(' ){ int nConsumed; int rc; pParse->nNest++; rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } *pnConsumed = (zInput - z) + 1 + nConsumed; return rc; } /* Check for a close bracket. */ if( *zInput==')' ){ pParse->nNest--; *pnConsumed = (zInput - z) + 1; return SQLITE_DONE; } } /* See if we are dealing with a quoted phrase. If this is the case, then ** search for the closing quote and pass the whole string to getNextString() ** for processing. This is easy to do, as fts3 has no syntax for escaping ** a quote character embedded in a string. */ if( *zInput=='"' ){ for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); *pnConsumed = (zInput - z) + ii + 1; if( ii==nInput ){ return SQLITE_ERROR; } return getNextString(pParse, &zInput[1], ii-1, ppExpr); } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer ** interface. Before doing so, figure out if there is an explicit ** column specifier for the token. ** ** TODO: Strangely, it is not possible to associate a column specifier ** with a quoted phrase, only with a single token. Not sure if this was ** an implementation artifact or an intentional decision when fts3 was ** first implemented. Whichever it was, this module duplicates the ** limitation. */ iCol = pParse->iDefaultCol; iColLen = 0; for(ii=0; ii<pParse->nCol; ii++){ const char *zStr = pParse->azCol[ii]; int nStr = strlen(zStr); if( nInput>nStr && zInput[nStr]==':' && memcmp(zStr, zInput, nStr)==0 ){ iCol = ii; iColLen = ((zInput - z) + nStr + 1); break; } } rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); *pnConsumed += iColLen; return rc;}/*** The argument is an Fts3Expr structure for a binary operator (any type** except an FTSQUERY_PHRASE). Return an integer value representing the** precedence of the operator. Lower values have a higher precedence (i.e.** group more tightly). For example, in the C language, the == operator** groups more tightly than ||, and would therefore have a higher precedence.**** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS** is defined), the order of the operators in precedence from highest to** lowest is:**** NEAR** NOT** AND (including implicit ANDs)** OR**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -