📄 fts3.c
字号:
dataBufferDestroy(&two); } dlrStep(&left); dlrStep(&right); } } dlrDestroy(&left); dlrDestroy(&right); dlwDestroy(&writer);}/* We have two DL_DOCIDS doclists: pLeft and pRight.** Write the intersection of these two doclists into pOut as a** DL_DOCIDS doclist.*/static void docListAndMerge( const char *pLeft, int nLeft, const char *pRight, int nRight, DataBuffer *pOut /* Write the combined doclist here */){ DLReader left, right; DLWriter writer; if( nLeft==0 || nRight==0 ) return; dlrInit(&left, DL_DOCIDS, pLeft, nLeft); dlrInit(&right, DL_DOCIDS, pRight, nRight); dlwInit(&writer, DL_DOCIDS, pOut); while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ if( dlrDocid(&left)<dlrDocid(&right) ){ dlrStep(&left); }else if( dlrDocid(&right)<dlrDocid(&left) ){ dlrStep(&right); }else{ dlwAdd(&writer, dlrDocid(&left)); dlrStep(&left); dlrStep(&right); } } dlrDestroy(&left); dlrDestroy(&right); dlwDestroy(&writer);}/* We have two DL_DOCIDS doclists: pLeft and pRight.** Write the union of these two doclists into pOut as a** DL_DOCIDS doclist.*/static void docListOrMerge( const char *pLeft, int nLeft, const char *pRight, int nRight, DataBuffer *pOut /* Write the combined doclist here */){ DLReader left, right; DLWriter writer; if( nLeft==0 ){ if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); return; } if( nRight==0 ){ dataBufferAppend(pOut, pLeft, nLeft); return; } dlrInit(&left, DL_DOCIDS, pLeft, nLeft); dlrInit(&right, DL_DOCIDS, pRight, nRight); dlwInit(&writer, DL_DOCIDS, pOut); while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ if( dlrAtEnd(&right) ){ dlwAdd(&writer, dlrDocid(&left)); dlrStep(&left); }else if( dlrAtEnd(&left) ){ dlwAdd(&writer, dlrDocid(&right)); dlrStep(&right); }else if( dlrDocid(&left)<dlrDocid(&right) ){ dlwAdd(&writer, dlrDocid(&left)); dlrStep(&left); }else if( dlrDocid(&right)<dlrDocid(&left) ){ dlwAdd(&writer, dlrDocid(&right)); dlrStep(&right); }else{ dlwAdd(&writer, dlrDocid(&left)); dlrStep(&left); dlrStep(&right); } } dlrDestroy(&left); dlrDestroy(&right); dlwDestroy(&writer);}/* We have two DL_DOCIDS doclists: pLeft and pRight.** Write into pOut as DL_DOCIDS doclist containing all documents that** occur in pLeft but not in pRight.*/static void docListExceptMerge( const char *pLeft, int nLeft, const char *pRight, int nRight, DataBuffer *pOut /* Write the combined doclist here */){ DLReader left, right; DLWriter writer; if( nLeft==0 ) return; if( nRight==0 ){ dataBufferAppend(pOut, pLeft, nLeft); return; } dlrInit(&left, DL_DOCIDS, pLeft, nLeft); dlrInit(&right, DL_DOCIDS, pRight, nRight); dlwInit(&writer, DL_DOCIDS, pOut); while( !dlrAtEnd(&left) ){ while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ dlrStep(&right); } if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ dlwAdd(&writer, dlrDocid(&left)); } dlrStep(&left); } dlrDestroy(&left); dlrDestroy(&right); dlwDestroy(&writer);}static char *string_dup_n(const char *s, int n){ char *str = sqlite3_malloc(n + 1); memcpy(str, s, n); str[n] = '\0'; return str;}/* Duplicate a string; the caller must free() the returned string. * (We don't use strdup() since it is not part of the standard C library and * may not be available everywhere.) */static char *string_dup(const char *s){ return string_dup_n(s, strlen(s));}/* Format a string, replacing each occurrence of the % character with * zDb.zName. This may be more convenient than sqlite_mprintf() * when one string is used repeatedly in a format string. * The caller must free() the returned string. */static char *string_format(const char *zFormat, const char *zDb, const char *zName){ const char *p; size_t len = 0; size_t nDb = strlen(zDb); size_t nName = strlen(zName); size_t nFullTableName = nDb+1+nName; char *result; char *r; /* first compute length needed */ for(p = zFormat ; *p ; ++p){ len += (*p=='%' ? nFullTableName : 1); } len += 1; /* for null terminator */ r = result = sqlite3_malloc(len); for(p = zFormat; *p; ++p){ if( *p=='%' ){ memcpy(r, zDb, nDb); r += nDb; *r++ = '.'; memcpy(r, zName, nName); r += nName; } else { *r++ = *p; } } *r++ = '\0'; assert( r == result + len ); return result;}static int sql_exec(sqlite3 *db, const char *zDb, const char *zName, const char *zFormat){ char *zCommand = string_format(zFormat, zDb, zName); int rc; FTSTRACE(("FTS3 sql: %s\n", zCommand)); rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); sqlite3_free(zCommand); return rc;}static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName, sqlite3_stmt **ppStmt, const char *zFormat){ char *zCommand = string_format(zFormat, zDb, zName); int rc; FTSTRACE(("FTS3 prepare: %s\n", zCommand)); rc = sqlite3_prepare_v2(db, zCommand, -1, ppStmt, NULL); sqlite3_free(zCommand); return rc;}/* end utility functions *//* Forward reference */typedef struct fulltext_vtab fulltext_vtab;/* A single term in a query is represented by an instances of** the following structure. Each word which may match against** document content is a term. Operators, like NEAR or OR, are** not terms. Query terms are organized as a flat list stored** in the Query.pTerms array.**** If the QueryTerm.nPhrase variable is non-zero, then the QueryTerm** is the first in a contiguous string of terms that are either part** of the same phrase, or connected by the NEAR operator.**** If the QueryTerm.nNear variable is non-zero, then the token is followed ** by a NEAR operator with span set to (nNear-1). For example, the ** following query:**** The QueryTerm.iPhrase variable stores the index of the token within** its phrase, indexed starting at 1, or 1 if the token is not part ** of any phrase.**** For example, the data structure used to represent the following query:**** ... MATCH 'sqlite NEAR/5 google NEAR/2 "search engine"'**** is:**** {nPhrase=4, iPhrase=1, nNear=6, pTerm="sqlite"},** {nPhrase=0, iPhrase=1, nNear=3, pTerm="google"},** {nPhrase=0, iPhrase=1, nNear=0, pTerm="search"},** {nPhrase=0, iPhrase=2, nNear=0, pTerm="engine"},**** compiling the FTS3 syntax to Query structures is done by the parseQuery()** function.*/typedef struct QueryTerm { short int nPhrase; /* How many following terms are part of the same phrase */ short int iPhrase; /* This is the i-th term of a phrase. */ short int iColumn; /* Column of the index that must match this term */ signed char nNear; /* term followed by a NEAR operator with span=(nNear-1) */ signed char isOr; /* this term is preceded by "OR" */ signed char isNot; /* this term is preceded by "-" */ signed char isPrefix; /* this term is followed by "*" */ char *pTerm; /* text of the term. '\000' terminated. malloced */ int nTerm; /* Number of bytes in pTerm[] */} QueryTerm;/* A query string is parsed into a Query structure. * * We could, in theory, allow query strings to be complicated * nested expressions with precedence determined by parentheses. * But none of the major search engines do this. (Perhaps the * feeling is that an parenthesized expression is two complex of * an idea for the average user to grasp.) Taking our lead from * the major search engines, we will allow queries to be a list * of terms (with an implied AND operator) or phrases in double-quotes, * with a single optional "-" before each non-phrase term to designate * negation and an optional OR connector. * * OR binds more tightly than the implied AND, which is what the * major search engines seem to do. So, for example: * * [one two OR three] ==> one AND (two OR three) * [one OR two three] ==> (one OR two) AND three * * A "-" before a term matches all entries that lack that term. * The "-" must occur immediately before the term with in intervening * space. This is how the search engines do it. * * A NOT term cannot be the right-hand operand of an OR. If this * occurs in the query string, the NOT is ignored: * * [one OR -two] ==> one OR two * */typedef struct Query { fulltext_vtab *pFts; /* The full text index */ int nTerms; /* Number of terms in the query */ QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ int nextIsOr; /* Set the isOr flag on the next inserted term */ int nextIsNear; /* Set the isOr flag on the next inserted term */ int nextColumn; /* Next word parsed must be in this column */ int dfltColumn; /* The default column */} Query;/*** An instance of the following structure keeps track of generated** matching-word offset information and snippets.*/typedef struct Snippet { int nMatch; /* Total number of matches */ int nAlloc; /* Space allocated for aMatch[] */ struct snippetMatch { /* One entry for each matching term */ char snStatus; /* Status flag for use while constructing snippets */ short int iCol; /* The column that contains the match */ short int iTerm; /* The index in Query.pTerms[] of the matching term */ int iToken; /* The index of the matching document token */ short int nByte; /* Number of bytes in the term */ int iStart; /* The offset to the first character of the term */ } *aMatch; /* Points to space obtained from malloc */ char *zOffset; /* Text rendering of aMatch[] */ int nOffset; /* strlen(zOffset) */ char *zSnippet; /* Snippet text */ int nSnippet; /* strlen(zSnippet) */} Snippet;typedef enum QueryType { QUERY_GENERIC, /* table scan */ QUERY_DOCID, /* lookup by docid */ QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/} QueryType;typedef enum fulltext_statement { CONTENT_INSERT_STMT, CONTENT_SELECT_STMT, CONTENT_UPDATE_STMT, CONTENT_DELETE_STMT, CONTENT_EXISTS_STMT, BLOCK_INSERT_STMT, BLOCK_SELECT_STMT, BLOCK_DELETE_STMT, BLOCK_DELETE_ALL_STMT, SEGDIR_MAX_INDEX_STMT, SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, SEGDIR_DELETE_ALL_STMT, SEGDIR_COUNT_STMT, MAX_STMT /* Always at end! */} fulltext_statement;/* These must exactly match the enum above. *//* TODO(shess): Is there some risk that a statement will be used in two** cursors at once, e.g. if a query joins a virtual table to itself?** If so perhaps we should move some of these to the cursor object.*/static const char *const fulltext_zStatement[MAX_STMT] = { /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ /* CONTENT_SELECT */ NULL, /* generated in contentSelectStatement() */ /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ /* CONTENT_DELETE */ "delete from %_content where docid = ?", /* CONTENT_EXISTS */ "select docid from %_content limit 1", /* BLOCK_INSERT */ "insert into %_segments (blockid, block) values (null, ?)", /* BLOCK_SELECT */ "select block from %_segments where blockid = ?", /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?", /* BLOCK_DELETE_ALL */ "delete from %_segments", /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", /* SEGDIR_SELECT_LEVEL */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? order by idx", /* SEGDIR_SPAN */ "select min(start_block), max(end_block) from %_segdir " " where level = ? and start_block <> 0", /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", /* NOTE(shess): The first three results of the following two ** statements must match. */ /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", /* SEGDIR_DELETE_ALL */ "delete from %_segdir", /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",};/*** A connection to a fulltext index is an instance of the following** structure. The
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -