📄 fts1.c
字号:
return str;
}
/* Duplicate a string; the caller must free() the returned string.
* (We don't use strdup() since it's not part of the standard C library and
* may not be available everywhere.) */
static char *string_dup(const char *s){
return string_dup_n(s, strlen(s));
}
/* Format a string, replacing each occurrence of the % character with
* zName. This may be more convenient than sqlite_mprintf()
* when one string is used repeatedly in a format string.
* The caller must free() the returned string. */
static char *string_format(const char *zFormat, const char *zName){
const char *p;
size_t len = 0;
size_t nName = strlen(zName);
char *result;
char *r;
/* first compute length needed */
for(p = zFormat ; *p ; ++p){
len += (*p=='%' ? nName : 1);
}
len += 1; /* for null terminator */
r = result = malloc(len);
for(p = zFormat; *p; ++p){
if( *p=='%' ){
memcpy(r, zName, nName);
r += nName;
} else {
*r++ = *p;
}
}
*r++ = '\0';
assert( r == result + len );
return result;
}
static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){
char *zCommand = string_format(zFormat, zName);
int rc;
TRACE(("FTS1 sql: %s\n", zCommand));
rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
free(zCommand);
return rc;
}
static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt,
const char *zFormat){
char *zCommand = string_format(zFormat, zName);
int rc;
TRACE(("FTS1 prepare: %s\n", zCommand));
rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
free(zCommand);
return rc;
}
/* end utility functions */
/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;
/* A single term in a query is represented by an instances of
** the following structure.
*/
typedef struct QueryTerm {
short int nPhrase; /* How many following terms are part of the same phrase */
short int iPhrase; /* This is the i-th term of a phrase. */
short int iColumn; /* Column of the index that must match this term */
signed char isOr; /* this term is preceded by "OR" */
signed char isNot; /* this term is preceded by "-" */
char *pTerm; /* text of the term. '\000' terminated. malloced */
int nTerm; /* Number of bytes in pTerm[] */
} QueryTerm;
/* A query string is parsed into a Query structure.
*
* We could, in theory, allow query strings to be complicated
* nested expressions with precedence determined by parentheses.
* But none of the major search engines do this. (Perhaps the
* feeling is that an parenthesized expression is two complex of
* an idea for the average user to grasp.) Taking our lead from
* the major search engines, we will allow queries to be a list
* of terms (with an implied AND operator) or phrases in double-quotes,
* with a single optional "-" before each non-phrase term to designate
* negation and an optional OR connector.
*
* OR binds more tightly than the implied AND, which is what the
* major search engines seem to do. So, for example:
*
* [one two OR three] ==> one AND (two OR three)
* [one OR two three] ==> (one OR two) AND three
*
* A "-" before a term matches all entries that lack that term.
* The "-" must occur immediately before the term with in intervening
* space. This is how the search engines do it.
*
* A NOT term cannot be the right-hand operand of an OR. If this
* occurs in the query string, the NOT is ignored:
*
* [one OR -two] ==> one OR two
*
*/
typedef struct Query {
fulltext_vtab *pFts; /* The full text index */
int nTerms; /* Number of terms in the query */
QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */
int nextIsOr; /* Set the isOr flag on the next inserted term */
int nextColumn; /* Next word parsed must be in this column */
int dfltColumn; /* The default column */
} Query;
/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
*/
typedef struct Snippet {
int nMatch; /* Total number of matches */
int nAlloc; /* Space allocated for aMatch[] */
struct snippetMatch { /* One entry for each matching term */
char snStatus; /* Status flag for use while constructing snippets */
short int iCol; /* The column that contains the match */
short int iTerm; /* The index in Query.pTerms[] of the matching term */
short int nByte; /* Number of bytes in the term */
int iStart; /* The offset to the first character of the term */
} *aMatch; /* Points to space obtained from malloc */
char *zOffset; /* Text rendering of aMatch[] */
int nOffset; /* strlen(zOffset) */
char *zSnippet; /* Snippet text */
int nSnippet; /* strlen(zSnippet) */
} Snippet;
typedef enum QueryType {
QUERY_GENERIC, /* table scan */
QUERY_ROWID, /* lookup by rowid */
QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
} QueryType;
/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
** before we start aggregating into larger segments. Lower CHUNK_MAX
** means that for a given input we have more individual segments per
** term, which means more rows in the table and a bigger index (due to
** both more rows and bigger rowids). But it also reduces the average
** cost of adding new elements to the segment 0 doclist, and it seems
** to reduce the number of pages read and written during inserts. 256
** was chosen by measuring insertion times for a certain input (first
** 10k documents of Enron corpus), though including query performance
** in the decision may argue for a larger value.
*/
#define CHUNK_MAX 256
typedef enum fulltext_statement {
CONTENT_INSERT_STMT,
CONTENT_SELECT_STMT,
CONTENT_UPDATE_STMT,
CONTENT_DELETE_STMT,
TERM_SELECT_STMT,
TERM_SELECT_ALL_STMT,
TERM_INSERT_STMT,
TERM_UPDATE_STMT,
TERM_DELETE_STMT,
MAX_STMT /* Always at end! */
} fulltext_statement;
/* These must exactly match the enum above. */
/* TODO(adam): Is there some risk that a statement (in particular,
** pTermSelectStmt) will be used in two cursors at once, e.g. if a
** query joins a virtual table to itself? If so perhaps we should
** move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
/* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */
/* CONTENT_SELECT */ "select * from %_content where rowid = ?",
/* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */
/* CONTENT_DELETE */ "delete from %_content where rowid = ?",
/* TERM_SELECT */
"select rowid, doclist from %_term where term = ? and segment = ?",
/* TERM_SELECT_ALL */
"select doclist from %_term where term = ? order by segment",
/* TERM_INSERT */
"insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
/* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
/* TERM_DELETE */ "delete from %_term where rowid = ?",
};
/*
** A connection to a fulltext index is an instance of the following
** structure. The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
*/
struct fulltext_vtab {
sqlite3_vtab base; /* Base class used by SQLite core */
sqlite3 *db; /* The database connection */
const char *zName; /* virtual table name */
int nColumn; /* number of columns in virtual table */
char **azColumn; /* column names. malloced */
char **azContentColumn; /* column names in content table; malloced */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
/* Precompiled statements which we keep as long as the table is
** open.
*/
sqlite3_stmt *pFulltextStatements[MAX_STMT];
};
/*
** When the core wants to do a query, it create a cursor using a
** call to xOpen. This structure is an instance of a cursor. It
** is destroyed by xClose.
*/
typedef struct fulltext_cursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
int eof; /* True if at End Of Results */
Query q; /* Parsed query string */
Snippet snippet; /* Cached snippet for the current row */
int iColumn; /* Column being searched */
DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */
} fulltext_cursor;
static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
return (fulltext_vtab *) c->base.pVtab;
}
static const sqlite3_module fulltextModule; /* forward declaration */
/* Append a list of strings separated by commas to a StringBuffer. */
static void appendList(StringBuffer *sb, int nString, char **azString){
int i;
for(i=0; i<nString; ++i){
if( i>0 ) append(sb, ", ");
append(sb, azString[i]);
}
}
/* Return a dynamically generated statement of the form
* insert into %_content (rowid, ...) values (?, ...)
*/
static const char *contentInsertStatement(fulltext_vtab *v){
StringBuffer sb;
int i;
initStringBuffer(&sb);
append(&sb, "insert into %_content (rowid, ");
appendList(&sb, v->nColumn, v->azContentColumn);
append(&sb, ") values (?");
for(i=0; i<v->nColumn; ++i)
append(&sb, ", ?");
append(&sb, ")");
return sb.s;
}
/* Return a dynamically generated statement of the form
* update %_content set [col_0] = ?, [col_1] = ?, ...
* where rowid = ?
*/
static const char *contentUpdateStatement(fulltext_vtab *v){
StringBuffer sb;
int i;
initStringBuffer(&sb);
append(&sb, "update %_content set ");
for(i=0; i<v->nColumn; ++i) {
if( i>0 ){
append(&sb, ", ");
}
append(&sb, v->azContentColumn[i]);
append(&sb, " = ?");
}
append(&sb, " where rowid = ?");
return sb.s;
}
/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
sqlite3_stmt **ppStmt){
assert( iStmt<MAX_STMT );
if( v->pFulltextStatements[iStmt]==NULL ){
const char *zStmt;
int rc;
switch( iStmt ){
case CONTENT_INSERT_STMT:
zStmt = contentInsertStatement(v); break;
case CONTENT_UPDATE_STMT:
zStmt = contentUpdateStatement(v); break;
default:
zStmt = fulltext_zStatement[iStmt];
}
rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt],
zStmt);
if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
if( rc!=SQLITE_OK ) return rc;
} else {
int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
if( rc!=SQLITE_OK ) return rc;
}
*ppStmt = v->pFulltextStatements[iStmt];
return SQLITE_OK;
}
/* Step the indicated statement, handling errors SQLITE_BUSY (by
** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
** bindings to the new statement).
** TODO(adam): We should extend this function so that it can work with
** statements declared locally, not only globally cached statements.
*/
static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
sqlite3_stmt **ppStmt){
int rc;
sqlite3_stmt *s = *ppStmt;
assert( iStmt<MAX_STMT );
assert( s==v->pFulltextStatements[iStmt] );
while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
sqlite3_stmt *pNewStmt;
if( rc==SQLITE_BUSY ) continue;
if( rc!=SQLITE_ERROR ) return rc;
rc = sqlite3_reset(s);
if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR;
v->pFulltextStatements[iStmt] = NULL; /* Still in s */
rc = sql_get_statement(v, iStmt, &pNewStmt);
if( rc!=SQLITE_OK ) goto err;
*ppStmt = pNewStmt;
rc = sqlite3_transfer_bindings(s, pNewStmt);
if( rc!=SQLITE_OK ) goto err;
rc = sqlite3_finalize(s);
if( rc!=SQLITE_OK ) return rc;
s = pNewStmt;
}
return rc;
err:
sqlite3_finalize(s);
return rc;
}
/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
** Useful for statements like UPDATE, where we expect no results.
*/
static int sql_single_step_statement(fulltext_vtab *v,
fulltext_statement iStmt,
sqlite3_stmt **ppStmt){
int rc = sql_step_statement(v, iStmt, ppStmt);
return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
}
/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
sqlite3_value **pValues){
sqlite3_stmt *s;
int i;
int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3_bind_value(s, 1, rowid);
if( rc!=SQLITE_OK ) return rc;
for(i=0; i<v->nColumn; ++i){
rc = sqlite3_bind_value(s, 2+i, pValues[i]);
if( rc!=SQLITE_OK ) return rc;
}
return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
}
/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
* where rowid = [iRowid] */
static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
sqlite_int64 iRowid){
sqlite3_stmt *s;
int i;
int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
for(i=0; i<v->nColumn; ++i){
rc = sqlite3_bind_value(s, 1+i, pValues[i]);
if( rc!=SQLITE_OK ) return rc;
}
rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
if( rc!=SQLITE_OK ) return rc;
return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
}
void freeStringArray(int nString, const char **pString){
int i;
for (i=0 ; i < nString ; ++i) {
free((void *) pString[i]);
}
free((void *) pString);
}
/* select * from %_content where rowid = [iRow]
* The caller must delete the returned array and all strings in it.
*
* TODO: Perhaps we should return pointer/length strings here for consistency
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -