📄 fts1.c
字号:
/* Format a string, replacing each occurrence of the % character with * zDb.zName. This may be more convenient than sqlite_mprintf() * when one string is used repeatedly in a format string. * The caller must free() the returned string. */static char *string_format(const char *zFormat, const char *zDb, const char *zName){ const char *p; size_t len = 0; size_t nDb = strlen(zDb); size_t nName = strlen(zName); size_t nFullTableName = nDb+1+nName; char *result; char *r; /* first compute length needed */ for(p = zFormat ; *p ; ++p){ len += (*p=='%' ? nFullTableName : 1); } len += 1; /* for null terminator */ r = result = malloc(len); for(p = zFormat; *p; ++p){ if( *p=='%' ){ memcpy(r, zDb, nDb); r += nDb; *r++ = '.'; memcpy(r, zName, nName); r += nName; } else { *r++ = *p; } } *r++ = '\0'; assert( r == result + len ); return result;}static int sql_exec(sqlite3 *db, const char *zDb, const char *zName, const char *zFormat){ char *zCommand = string_format(zFormat, zDb, zName); int rc; TRACE(("FTS1 sql: %s\n", zCommand)); rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); free(zCommand); return rc;}static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName, sqlite3_stmt **ppStmt, const char *zFormat){ char *zCommand = string_format(zFormat, zDb, zName); int rc; TRACE(("FTS1 prepare: %s\n", zCommand)); rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); free(zCommand); return rc;}/* end utility functions *//* Forward reference */typedef struct fulltext_vtab fulltext_vtab;/* A single term in a query is represented by an instances of** the following structure.*/typedef struct QueryTerm { short int nPhrase; /* How many following terms are part of the same phrase */ short int iPhrase; /* This is the i-th term of a phrase. */ short int iColumn; /* Column of the index that must match this term */ signed char isOr; /* this term is preceded by "OR" */ signed char isNot; /* this term is preceded by "-" */ char *pTerm; /* text of the term. '\000' terminated. malloced */ int nTerm; /* Number of bytes in pTerm[] */} QueryTerm;/* A query string is parsed into a Query structure. * * We could, in theory, allow query strings to be complicated * nested expressions with precedence determined by parentheses. * But none of the major search engines do this. (Perhaps the * feeling is that an parenthesized expression is two complex of * an idea for the average user to grasp.) Taking our lead from * the major search engines, we will allow queries to be a list * of terms (with an implied AND operator) or phrases in double-quotes, * with a single optional "-" before each non-phrase term to designate * negation and an optional OR connector. * * OR binds more tightly than the implied AND, which is what the * major search engines seem to do. So, for example: * * [one two OR three] ==> one AND (two OR three) * [one OR two three] ==> (one OR two) AND three * * A "-" before a term matches all entries that lack that term. * The "-" must occur immediately before the term with in intervening * space. This is how the search engines do it. * * A NOT term cannot be the right-hand operand of an OR. If this * occurs in the query string, the NOT is ignored: * * [one OR -two] ==> one OR two * */typedef struct Query { fulltext_vtab *pFts; /* The full text index */ int nTerms; /* Number of terms in the query */ QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ int nextIsOr; /* Set the isOr flag on the next inserted term */ int nextColumn; /* Next word parsed must be in this column */ int dfltColumn; /* The default column */} Query;/*** An instance of the following structure keeps track of generated** matching-word offset information and snippets.*/typedef struct Snippet { int nMatch; /* Total number of matches */ int nAlloc; /* Space allocated for aMatch[] */ struct snippetMatch { /* One entry for each matching term */ char snStatus; /* Status flag for use while constructing snippets */ short int iCol; /* The column that contains the match */ short int iTerm; /* The index in Query.pTerms[] of the matching term */ short int nByte; /* Number of bytes in the term */ int iStart; /* The offset to the first character of the term */ } *aMatch; /* Points to space obtained from malloc */ char *zOffset; /* Text rendering of aMatch[] */ int nOffset; /* strlen(zOffset) */ char *zSnippet; /* Snippet text */ int nSnippet; /* strlen(zSnippet) */} Snippet;typedef enum QueryType { QUERY_GENERIC, /* table scan */ QUERY_ROWID, /* lookup by rowid */ QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/} QueryType;/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0** before we start aggregating into larger segments. Lower CHUNK_MAX** means that for a given input we have more individual segments per** term, which means more rows in the table and a bigger index (due to** both more rows and bigger rowids). But it also reduces the average** cost of adding new elements to the segment 0 doclist, and it seems** to reduce the number of pages read and written during inserts. 256** was chosen by measuring insertion times for a certain input (first** 10k documents of Enron corpus), though including query performance** in the decision may argue for a larger value.*/#define CHUNK_MAX 256typedef enum fulltext_statement { CONTENT_INSERT_STMT, CONTENT_SELECT_STMT, CONTENT_UPDATE_STMT, CONTENT_DELETE_STMT, TERM_SELECT_STMT, TERM_SELECT_ALL_STMT, TERM_INSERT_STMT, TERM_UPDATE_STMT, TERM_DELETE_STMT, MAX_STMT /* Always at end! */} fulltext_statement;/* These must exactly match the enum above. *//* TODO(adam): Is there some risk that a statement (in particular,** pTermSelectStmt) will be used in two cursors at once, e.g. if a** query joins a virtual table to itself? If so perhaps we should** move some of these to the cursor object.*/static const char *const fulltext_zStatement[MAX_STMT] = { /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ /* CONTENT_SELECT */ "select * from %_content where rowid = ?", /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ /* CONTENT_DELETE */ "delete from %_content where rowid = ?", /* TERM_SELECT */ "select rowid, doclist from %_term where term = ? and segment = ?", /* TERM_SELECT_ALL */ "select doclist from %_term where term = ? order by segment", /* TERM_INSERT */ "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)", /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", /* TERM_DELETE */ "delete from %_term where rowid = ?",};/*** A connection to a fulltext index is an instance of the following** structure. The xCreate and xConnect methods create an instance** of this structure and xDestroy and xDisconnect free that instance.** All other methods receive a pointer to the structure as one of their** arguments.*/struct fulltext_vtab { sqlite3_vtab base; /* Base class used by SQLite core */ sqlite3 *db; /* The database connection */ const char *zDb; /* logical database name */ const char *zName; /* virtual table name */ int nColumn; /* number of columns in virtual table */ char **azColumn; /* column names. malloced */ char **azContentColumn; /* column names in content table; malloced */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ /* Precompiled statements which we keep as long as the table is ** open. */ sqlite3_stmt *pFulltextStatements[MAX_STMT];};/*** When the core wants to do a query, it create a cursor using a** call to xOpen. This structure is an instance of a cursor. It** is destroyed by xClose.*/typedef struct fulltext_cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ int eof; /* True if at End Of Results */ Query q; /* Parsed query string */ Snippet snippet; /* Cached snippet for the current row */ int iColumn; /* Column being searched */ DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */ } fulltext_cursor;static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ return (fulltext_vtab *) c->base.pVtab;}static const sqlite3_module fulltextModule; /* forward declaration *//* Append a list of strings separated by commas to a StringBuffer. */static void appendList(StringBuffer *sb, int nString, char **azString){ int i; for(i=0; i<nString; ++i){ if( i>0 ) append(sb, ", "); append(sb, azString[i]); }}/* Return a dynamically generated statement of the form * insert into %_content (rowid, ...) values (?, ...) */static const char *contentInsertStatement(fulltext_vtab *v){ StringBuffer sb; int i; initStringBuffer(&sb); append(&sb, "insert into %_content (rowid, "); appendList(&sb, v->nColumn, v->azContentColumn); append(&sb, ") values (?"); for(i=0; i<v->nColumn; ++i) append(&sb, ", ?"); append(&sb, ")"); return sb.s;}/* Return a dynamically generated statement of the form * update %_content set [col_0] = ?, [col_1] = ?, ... * where rowid = ? */static const char *contentUpdateStatement(fulltext_vtab *v){ StringBuffer sb; int i; initStringBuffer(&sb); append(&sb, "update %_content set "); for(i=0; i<v->nColumn; ++i) { if( i>0 ){ append(&sb, ", "); } append(&sb, v->azContentColumn[i]); append(&sb, " = ?"); } append(&sb, " where rowid = ?"); return sb.s;}/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.** If the indicated statement has never been prepared, it is prepared** and cached, otherwise the cached version is reset.*/static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, sqlite3_stmt **ppStmt){ assert( iStmt<MAX_STMT ); if( v->pFulltextStatements[iStmt]==NULL ){ const char *zStmt; int rc; switch( iStmt ){ case CONTENT_INSERT_STMT: zStmt = contentInsertStatement(v); break; case CONTENT_UPDATE_STMT: zStmt = contentUpdateStatement(v); break; default: zStmt = fulltext_zStatement[iStmt]; } rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], zStmt); if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt); if( rc!=SQLITE_OK ) return rc; } else { int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); if( rc!=SQLITE_OK ) return rc; } *ppStmt = v->pFulltextStatements[iStmt]; return SQLITE_OK;}/* Step the indicated statement, handling errors SQLITE_BUSY (by** retrying) and SQLITE_SCHEMA (by re-preparing and transferring** bindings to the new statement).** TODO(adam): We should extend this function so that it can work with** statements declared locally, not only globally cached statements.*/static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, sqlite3_stmt **ppStmt){ int rc; sqlite3_stmt *s = *ppStmt; assert( iStmt<MAX_STMT ); assert( s==v->pFulltextStatements[iStmt] ); while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ sqlite3_stmt *pNewStmt; if( rc==SQLITE_BUSY ) continue; if( rc!=SQLITE_ERROR ) return rc; rc = sqlite3_reset(s); if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR; v->pFulltextStatements[iStmt] = NULL; /* Still in s */ rc = sql_get_statement(v, iStmt, &pNewStmt); if( rc!=SQLITE_OK ) goto err; *ppStmt = pNewStmt; rc = sqlite3_transfer_bindings(s, pNewStmt); if( rc!=SQLITE_OK ) goto err; rc = sqlite3_finalize(s); if( rc!=SQLITE_OK ) return rc; s = pNewStmt; } return rc; err: sqlite3_finalize(s); return rc;}/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.** Useful for statements like UPDATE, where we expect no results.*/static int sql_single_step_statement(fulltext_vtab *v, fulltext_statement iStmt, sqlite3_stmt **ppStmt){ int rc = sql_step_statement(v, iStmt, ppStmt); return (rc==SQLITE_DONE) ? SQLITE_OK : rc;}/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, sqlite3_value **pValues){ sqlite3_stmt *s; int i; int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_value(s, 1, rowid); if( rc!=SQLITE_OK ) return rc; for(i=0; i<v->nColumn; ++i){ rc = sqlite3_bind_value(s, 2+i, pValues[i]); if( rc!=SQLITE_OK ) return rc; } return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);}/* update %_content set col0 = pValues[0], col1 = pValues[1], ... * where rowid = [iRowid] */static int content_update(fulltext_vtab *v, sqlite3_value **pValues, sqlite_int64 iRowid){ sqlite3_stmt *s; int i; int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); if( rc!=SQLITE_OK ) return rc; for(i=0; i<v->nColumn; ++i){ rc = sqlite3_bind_value(s, 1+i, pValues[i]); if( rc!=SQLITE_OK ) return rc; } rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);}static void freeStringArray(int nString, const char **pString){ int i; for (i=0 ; i < nString ; ++i) { if( pString[i]!=NULL ) free((void *) pString[i]); } free((void *) pString);}/* select * from %_content where rowid = [iRow] * The caller must delete the returned array and all strings in it. * null fields will be NULL in the returned array. * * TODO: Perhaps we should return pointer/length strings here for consistency * with other code which uses pointer/length. */static int content_select(fulltext_vtab *v, sqlite_int64 iRow, const char ***pValues){ sqlite3_stmt *s; const char **values; int i; int rc; *pValues = NULL; rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iRow);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -