📄 fulltext.c
字号:
*r++ = '\0'; assert( r == result + len ); return result;}static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){ char *zCommand = string_format(zFormat, zName); int rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); free(zCommand); return rc;}static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt, const char *zFormat){ char *zCommand = string_format(zFormat, zName); int rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); free(zCommand); return rc;}/* end utility functions */#define QUERY_GENERIC 0#define QUERY_FULLTEXT 1#define CHUNK_MAX 1024typedef enum fulltext_statement { CONTENT_INSERT_STMT, CONTENT_SELECT_STMT, CONTENT_DELETE_STMT, TERM_SELECT_STMT, TERM_CHUNK_SELECT_STMT, TERM_INSERT_STMT, TERM_UPDATE_STMT, TERM_DELETE_STMT, MAX_STMT /* Always at end! */} fulltext_statement;/* These must exactly match the enum above. *//* TODO(adam): Is there some risk that a statement (in particular,** pTermSelectStmt) will be used in two cursors at once, e.g. if a** query joins a virtual table to itself? If so perhaps we should** move some of these to the cursor object.*/static const char *fulltext_zStatement[MAX_STMT] = { /* CONTENT_INSERT */ "insert into %_content (rowid, content) values (?, ?)", /* CONTENT_SELECT */ "select content from %_content where rowid = ?", /* CONTENT_DELETE */ "delete from %_content where rowid = ?", /* TERM_SELECT */ "select rowid, doclist from %_term where term = ? and first = ?", /* TERM_CHUNK_SELECT */ "select max(first) from %_term where term = ? and first <= ?", /* TERM_INSERT */ "insert into %_term (term, first, doclist) values (?, ?, ?)", /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", /* TERM_DELETE */ "delete from %_term where rowid = ?",};typedef struct fulltext_vtab { sqlite3_vtab base; sqlite3 *db; const char *zName; /* virtual table name */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ /* Precompiled statements which we keep as long as the table is ** open. */ sqlite3_stmt *pFulltextStatements[MAX_STMT];} fulltext_vtab;typedef struct fulltext_cursor { sqlite3_vtab_cursor base; int iCursorType; /* QUERY_GENERIC or QUERY_FULLTEXT */ sqlite3_stmt *pStmt; int eof; /* The following is used only when iCursorType == QUERY_FULLTEXT. */ DocListReader result;} fulltext_cursor;static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ return (fulltext_vtab *) c->base.pVtab;}static sqlite3_module fulltextModule; /* forward declaration *//* Puts a freshly-prepared statement determined by iStmt in *ppStmt.** If the indicated statement has never been prepared, it is prepared** and cached, otherwise the cached version is reset.*/static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, sqlite3_stmt **ppStmt){ assert( iStmt<MAX_STMT ); if( v->pFulltextStatements[iStmt]==NULL ){ int rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt], fulltext_zStatement[iStmt]); if( rc!=SQLITE_OK ) return rc; } else { int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); if( rc!=SQLITE_OK ) return rc; } *ppStmt = v->pFulltextStatements[iStmt]; return SQLITE_OK;}/* Step the indicated statement, handling errors SQLITE_BUSY (by** retrying) and SQLITE_SCHEMA (by re-preparing and transferring** bindings to the new statement).** TODO(adam): We should extend this function so that it can work with** statements declared locally, not only globally cached statements.*/static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, sqlite3_stmt **ppStmt){ int rc; sqlite3_stmt *s = *ppStmt; assert( iStmt<MAX_STMT ); assert( s==v->pFulltextStatements[iStmt] ); while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ sqlite3_stmt *pNewStmt; if( rc==SQLITE_BUSY ) continue; if( rc!=SQLITE_ERROR ) return rc; rc = sqlite3_reset(s); if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR; v->pFulltextStatements[iStmt] = NULL; /* Still in s */ rc = sql_get_statement(v, iStmt, &pNewStmt); if( rc!=SQLITE_OK ) goto err; *ppStmt = pNewStmt; rc = sqlite3_transfer_bindings(s, pNewStmt); if( rc!=SQLITE_OK ) goto err; rc = sqlite3_finalize(s); if( rc!=SQLITE_OK ) return rc; s = pNewStmt; } return rc; err: sqlite3_finalize(s); return rc;}/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.** Useful for statements like UPDATE, where we expect no results.*/static int sql_single_step_statement(fulltext_vtab *v, fulltext_statement iStmt, sqlite3_stmt **ppStmt){ int rc = sql_step_statement(v, iStmt, ppStmt); return (rc==SQLITE_DONE) ? SQLITE_OK : rc;}/* insert into %_content (rowid, content) values ([rowid], [zContent]) */static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, const char *zContent, int nContent){ sqlite3_stmt *s; int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_value(s, 1, rowid); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_text(s, 2, zContent, nContent, SQLITE_STATIC); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);}/* select content from %_content where rowid = [iRow] * The caller must delete the returned string. */static int content_select(fulltext_vtab *v, sqlite_int64 iRow, char **pzContent){ sqlite3_stmt *s; int rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iRow); if( rc!=SQLITE_OK ) return rc; rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); if( rc!=SQLITE_ROW ) return rc; *pzContent = string_dup((const char *)sqlite3_column_text(s, 0)); /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain locked. */ rc = sqlite3_step(s); if( rc==SQLITE_DONE ) return SQLITE_OK; free(*pzContent); return rc;}/* delete from %_content where rowid = [iRow ] */static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ sqlite3_stmt *s; int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iRow); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);}/* select rowid, doclist from %_term where term = [zTerm] and first = [iFirst] * If found, returns SQLITE_OK; the caller must free the returned doclist. * If no rows found, returns SQLITE_ERROR. */static int term_select(fulltext_vtab *v, const char *zTerm, int nTerm, sqlite_int64 iFirst, sqlite_int64 *rowid, DocList *out){ sqlite3_stmt *s; int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_TRANSIENT); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 2, iFirst); if( rc!=SQLITE_OK ) return rc; rc = sql_step_statement(v, TERM_SELECT_STMT, &s); if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc; *rowid = sqlite3_column_int64(s, 0); docListInit(out, DL_POSITIONS_OFFSETS, sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain locked. */ rc = sqlite3_step(s); return rc==SQLITE_DONE ? SQLITE_OK : rc;}/* select max(first) from %_term where term = [zTerm] and first <= [iFirst] * If found, returns SQLITE_ROW and result in *piResult; if the query returns * NULL (meaning no row found) returns SQLITE_DONE. */static int term_chunk_select(fulltext_vtab *v, const char *zTerm, int nTerm, sqlite_int64 iFirst, sqlite_int64 *piResult){ sqlite3_stmt *s; int rc = sql_get_statement(v, TERM_CHUNK_SELECT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 2, iFirst); if( rc!=SQLITE_OK ) return rc; rc = sql_step_statement(v, TERM_CHUNK_SELECT_STMT, &s); if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc; switch( sqlite3_column_type(s, 0) ){ case SQLITE_NULL: rc = SQLITE_DONE; break; case SQLITE_INTEGER: *piResult = sqlite3_column_int64(s, 0); break; default: return SQLITE_ERROR; } /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain locked. */ if( sqlite3_step(s) != SQLITE_DONE ) return SQLITE_ERROR; return rc;}/* insert into %_term (term, first, doclist) values ([zTerm], [iFirst], [doclist]) */static int term_insert(fulltext_vtab *v, const char *zTerm, int nTerm, sqlite_int64 iFirst, DocList *doclist){ sqlite3_stmt *s; int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 2, iFirst); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, TERM_INSERT_STMT, &s);}/* update %_term set doclist = [doclist] where rowid = [rowid] */static int term_update(fulltext_vtab *v, sqlite_int64 rowid, DocList *doclist){ sqlite3_stmt *s; int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 2, rowid); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);}static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ sqlite3_stmt *s; int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, rowid); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, TERM_DELETE_STMT, &s);}static void fulltext_vtab_destroy(fulltext_vtab *v){ int iStmt; for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){ if( v->pFulltextStatements[iStmt]!=NULL ){ sqlite3_finalize(v->pFulltextStatements[iStmt]); v->pFulltextStatements[iStmt] = NULL; } } if( v->pTokenizer!=NULL ){ v->pTokenizer->pModule->xDestroy(v->pTokenizer); v->pTokenizer = NULL; } free((void *) v->zName); free(v);}/* Current interface:** argv[0] - module name** argv[1] - database name** argv[2] - table name** argv[3] - tokenizer name (optional, a sensible default is provided)** argv[4..] - passed to tokenizer (optional based on tokenizer)**/static int fulltextConnect(sqlite3 *db, void *pAux, int argc, char **argv, sqlite3_vtab **ppVTab){ int rc; fulltext_vtab *v; sqlite3_tokenizer_module *m = NULL; assert( argc>=3 ); v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); /* sqlite will initialize v->base */ v->db = db; v->zName = string_dup(argv[2]); v->pTokenizer = NULL; if( argc==3 ){ get_simple_tokenizer_module(&m); } else { /* TODO(shess) For now, add new tokenizers as else if clauses. */ if( !strcmp(argv[3], "simple") ){ get_simple_tokenizer_module(&m); } else { assert( "unrecognized tokenizer"==NULL ); } } /* TODO(shess) Since tokenization impacts the index, the parameters ** to the tokenizer need to be identical when a persistent virtual ** table is re-created. One solution would be a meta-table to track ** such information in the database. Then we could verify that the ** information is identical on subsequent creates. */ /* TODO(shess) Why isn't argv already (const char **)? */ rc = m->xCreate(argc-3, (const char **) (argv+3), &v->pTokenizer); if( rc!=SQLITE_OK ) return rc; v->pTokenizer->pModule = m; /* TODO: verify the existence of backing tables foo_content, foo_term */ rc = sqlite3_declare_vtab(db, "create table x(content text)"); if( rc!=SQLITE_OK ) return rc; memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); *ppVTab = &v->base; return SQLITE_OK;}static int fulltextCreate(sqlite3 *db, void *pAux, int argc, char **argv, sqlite3_vtab **ppVTab){ int rc; assert( argc>=3 ); /* The %_content table holds the text of each full-text item, with ** the rowid used as the docid. ** ** The %_term table maps each term to a document list blob ** containing elements sorted by ascending docid, each element ** encoded as: ** ** docid varint-encoded ** token count varint-encoded ** "count" token elements (poslist): ** position varint-encoded as delta from previous position ** start offset varint-encoded as delta from previous start offset ** end offset varint-encoded as delta from start offset ** ** Additionally, doclist blobs can be chunked into multiple rows, ** using "first" to order the blobs. "first" is simply the first ** docid in the blob. */ /* ** NOTE(shess) That last sentence is incorrect in the face of ** deletion, which can leave a doclist that doesn't contain the ** first from that row. I _believe_ this does not matter to the ** operation of the system, but it might be reasonable to update ** appropriately in case this assumption becomes more important. */ rc = sql_exec(db, argv[2], "create table %_content(content text);" "create table %_term(term text, first integer, doclist blob);" "create index %_index on %_term(term, first)"); if( rc!=SQLITE_OK ) return rc; return fulltextConnect(db, pAux, argc, argv, ppVTab);}/* Decide how to handle an SQL query. * At the moment, MATCH queries can include implicit boolean ANDs; we * haven't implemented phrase searches or OR yet. */static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ int i; for(i=0; i<pInfo->nConstraint; ++i){ const struct sqlite3_index_constraint *pConstraint; pConstraint = &pInfo->aConstraint[i]; if( pConstraint->iColumn==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH && pConstraint->usable ){ /* a full-text search */ pInfo->aConstraintUsage[i].argvIndex = 1; pInfo->aConstraintUsage[i].omit = 1; pInfo->idxNum = QUERY_FULLTEXT; pInfo->estimatedCost = 1.0; /* an arbitrary value for now */ return SQLITE_OK; } } pInfo->idxNum = QUERY_GENERIC; return SQLITE_OK;}static int fulltextDisconnect(sqlite3_vtab *pVTab){ fulltext_vtab_destroy((fulltext_vtab *)pVTab); return SQLITE_OK;}static int fulltextDestroy(sqlite3_vtab *pVTab){ fulltext_vtab *v = (fulltext_vtab *)pVTab; int rc = sql_exec(v->db, v->zName, "drop table %_content; drop table %_term"); if( rc!=SQLITE_OK ) return rc; fulltext_vtab_destroy((fulltext_vtab *)pVTab); return SQLITE_OK;}static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ fulltext_cursor *c; c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); /* sqlite will initialize c->base */ *ppCursor = &c->base; return SQLITE_OK;}static int fulltextClose(sqlite3_vtab_cursor *pCursor){ fulltext_cursor *c = (fulltext_cursor *) pCursor; sqlite3_finalize(c->pStmt); if( c->result.pDoclist!=NULL ){ docListDelete(c->result.pDoclist); } free(c); return SQLITE_OK;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -