📄 fulltext.c
字号:
static int fulltextNext(sqlite3_vtab_cursor *pCursor){ fulltext_cursor *c = (fulltext_cursor *) pCursor; sqlite_int64 iDocid; int rc; switch( c->iCursorType ){ case QUERY_GENERIC: /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ rc = sqlite3_step(c->pStmt); switch( rc ){ case SQLITE_ROW: c->eof = 0; return SQLITE_OK; case SQLITE_DONE: c->eof = 1; return SQLITE_OK; default: c->eof = 1; return rc; } case QUERY_FULLTEXT: rc = sqlite3_reset(c->pStmt); if( rc!=SQLITE_OK ) return rc; if( readerAtEnd(&c->result)){ c->eof = 1; return SQLITE_OK; } iDocid = readDocid(&c->result); rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); if( rc!=SQLITE_OK ) return rc; /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ rc = sqlite3_step(c->pStmt); if( rc==SQLITE_ROW ){ /* the case we expect */ c->eof = 0; return SQLITE_OK; } /* an error occurred; abort */ return rc==SQLITE_DONE ? SQLITE_ERROR : rc; default: assert( 0 ); return SQLITE_ERROR; /* not reached */ }}static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm, sqlite3_stmt **ppStmt){ int rc; if( *ppStmt ){ rc = sqlite3_reset(*ppStmt); } else { rc = sql_prepare(v->db, v->zName, ppStmt, "select doclist from %_term where term = ? order by first"); } if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT); if( rc!=SQLITE_OK ) return rc; return sqlite3_step(*ppStmt); /* TODO(adamd): handle schema error */}/* Read the posting list for [zTerm]; AND it with the doclist [in] to * produce the doclist [out], using the given offset [iOffset] for phrase * matching. * (*pSelect) is used to hold an SQLite statement used inside this function; * the caller should initialize *pSelect to NULL before the first call. */static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect, const char *zTerm, DocList *pIn, int iOffset, DocList *out){ int rc; DocListMerge merge; if( pIn!=NULL && !pIn->nData ){ /* If [pIn] is already empty, there's no point in reading the * posting list to AND it in; return immediately. */ return SQLITE_OK; } rc = term_select_doclist(v, zTerm, -1, pSelect); if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; mergeInit(&merge, pIn, iOffset, out); while( rc==SQLITE_ROW ){ DocList block; docListInit(&block, DL_POSITIONS_OFFSETS, sqlite3_column_blob(*pSelect, 0), sqlite3_column_bytes(*pSelect, 0)); mergeBlock(&merge, &block); docListDestroy(&block); rc = sqlite3_step(*pSelect); if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){ return rc; } } return SQLITE_OK;}typedef struct QueryTerm { int is_phrase; /* true if this term begins a new phrase */ const char *zTerm;} QueryTerm;/* A parsed query. * * As an example, parsing the query ["four score" years "new nation"] will * yield a Query with 5 terms: * "four", is_phrase = 1 * "score", is_phrase = 0 * "years", is_phrase = 1 * "new", is_phrase = 1 * "nation", is_phrase = 0 */typedef struct Query { int nTerms; QueryTerm *pTerm;} Query;static void query_add(Query *q, int is_phrase, const char *zTerm){ QueryTerm *t; ++q->nTerms; q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0])); t = &q->pTerm[q->nTerms - 1]; t->is_phrase = is_phrase; t->zTerm = zTerm;} static void query_free(Query *q){ int i; for(i = 0; i < q->nTerms; ++i){ free((void *) q->pTerm[i].zTerm); } free(q->pTerm);}static int tokenize_segment(sqlite3_tokenizer *pTokenizer, const char *zQuery, int in_phrase, Query *pQuery){ sqlite3_tokenizer_module *pModule = pTokenizer->pModule; sqlite3_tokenizer_cursor *pCursor; int is_first = 1; int rc = pModule->xOpen(pTokenizer, zQuery, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( 1 ){ const char *zToken; int nToken, iStartOffset, iEndOffset, dummy_pos; rc = pModule->xNext(pCursor, &zToken, &nToken, &iStartOffset, &iEndOffset, &dummy_pos); if( rc!=SQLITE_OK ) break; query_add(pQuery, !in_phrase || is_first, string_dup_n(zToken, nToken)); is_first = 0; } return pModule->xClose(pCursor);}/* Parse a query string, yielding a Query object. */static int parse_query(fulltext_vtab *v, const char *zQuery, Query *pQuery){ char *zQuery1 = string_dup(zQuery); int in_phrase = 0; char *s = zQuery1; pQuery->nTerms = 0; pQuery->pTerm = NULL; while( *s ){ char *t = s; while( *t ){ if( *t=='"' ){ *t++ = '\0'; break; } ++t; } if( *s ){ tokenize_segment(v->pTokenizer, s, in_phrase, pQuery); } s = t; in_phrase = !in_phrase; } free(zQuery1); return SQLITE_OK;}/* Perform a full-text query; return a list of documents in [pResult]. */static int fulltext_query(fulltext_vtab *v, const char *zQuery, DocList **pResult){ Query q; int phrase_start = -1; int i; sqlite3_stmt *pSelect = NULL; DocList *d = NULL; int rc = parse_query(v, zQuery, &q); if( rc!=SQLITE_OK ) return rc; /* Merge terms. */ for(i = 0 ; i < q.nTerms ; ++i){ /* In each merge step, we need to generate positions whenever we're * processing a phrase which hasn't ended yet. */ int need_positions = i<q.nTerms-1 && !q.pTerm[i+1].is_phrase; DocList *next = docListNew(need_positions ? DL_POSITIONS : DL_DOCIDS); if( q.pTerm[i].is_phrase ){ phrase_start = i; } rc = query_merge(v, &pSelect, q.pTerm[i].zTerm, d, i - phrase_start, next); if( rc!=SQLITE_OK ) break; if( d!=NULL ){ docListDelete(d); } d = next; } sqlite3_finalize(pSelect); query_free(&q); *pResult = d; return rc;}static int fulltextFilter(sqlite3_vtab_cursor *pCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv){ fulltext_cursor *c = (fulltext_cursor *) pCursor; fulltext_vtab *v = cursor_vtab(c); int rc; const char *zStatement; c->iCursorType = idxNum; switch( idxNum ){ case QUERY_GENERIC: zStatement = "select rowid, content from %_content"; break; case QUERY_FULLTEXT: /* full-text search */ { const char *zQuery = (const char *)sqlite3_value_text(argv[0]); DocList *pResult; assert( argc==1 ); rc = fulltext_query(v, zQuery, &pResult); if( rc!=SQLITE_OK ) return rc; readerInit(&c->result, pResult); zStatement = "select rowid, content from %_content where rowid = ?"; break; } default: assert( 0 ); } rc = sql_prepare(v->db, v->zName, &c->pStmt, zStatement); if( rc!=SQLITE_OK ) return rc; return fulltextNext(pCursor);}static int fulltextEof(sqlite3_vtab_cursor *pCursor){ fulltext_cursor *c = (fulltext_cursor *) pCursor; return c->eof;}static int fulltextColumn(sqlite3_vtab_cursor *pCursor, sqlite3_context *pContext, int idxCol){ fulltext_cursor *c = (fulltext_cursor *) pCursor; const char *s; assert( idxCol==0 ); s = (const char *) sqlite3_column_text(c->pStmt, 1); sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT); return SQLITE_OK;}static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ fulltext_cursor *c = (fulltext_cursor *) pCursor; *pRowid = sqlite3_column_int64(c->pStmt, 0); return SQLITE_OK;}/* Build a hash table containing all terms in zText. */static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer, const char *zText, sqlite_int64 iDocid){ sqlite3_tokenizer_cursor *pCursor; const char *pToken; int nTokenBytes; int iStartOffset, iEndOffset, iPosition; int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; HashInit(terms, HASH_STRING, 1); while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DocList *p; /* Positions can't be negative; we use -1 as a terminator internally. */ if( iPosition<0 ) { rc = SQLITE_ERROR; goto err; } p = HashFind(terms, pToken, nTokenBytes); if( p==NULL ){ p = docListNew(DL_POSITIONS_OFFSETS); docListAddDocid(p, iDocid); HashInsert(terms, pToken, nTokenBytes, p); } docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset); }err: /* TODO(shess) Check return? Should this be able to cause errors at ** this point? Actually, same question about sqlite3_finalize(), ** though one could argue that failure there means that the data is ** not durable. *ponder* */ pTokenizer->pModule->xClose(pCursor); return rc;}/* Update the %_terms table to map the term [zTerm] to the given rowid. */static int index_insert_term(fulltext_vtab *v, const char *zTerm, int nTerm, sqlite_int64 iDocid, DocList *p){ sqlite_int64 iFirst; sqlite_int64 iIndexRow; DocList doclist; int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst); if( rc==SQLITE_DONE ){ docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0); if( docListUpdate(&doclist, iDocid, p) ){ rc = term_insert(v, zTerm, nTerm, iDocid, &doclist); docListDestroy(&doclist); return rc; } return SQLITE_OK; } if( rc!=SQLITE_ROW ) return SQLITE_ERROR; /* This word is in the index; add this document ID to its blob. */ rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist); if( rc!=SQLITE_OK ) return rc; if( docListUpdate(&doclist, iDocid, p) ){ /* If the blob is too big, split it in half. */ if( doclist.nData>CHUNK_MAX ){ DocList half; if( docListSplit(&doclist, &half) ){ rc = term_insert(v, zTerm, nTerm, firstDocid(&half), &half); docListDestroy(&half); if( rc!=SQLITE_OK ) goto err; } } rc = term_update(v, iIndexRow, &doclist); }err: docListDestroy(&doclist); return rc;}/* Insert a row into the full-text index; set *piRowid to be the ID of the * new row. */static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, const char *zText, sqlite_int64 *piRowid){ Hash terms; /* maps term string -> PosList */ HashElem *e; int rc = content_insert(v, pRequestRowid, zText, -1); if( rc!=SQLITE_OK ) return rc; *piRowid = sqlite3_last_insert_rowid(v->db); if( !zText ) return SQLITE_OK; /* nothing to index */ rc = build_terms(&terms, v->pTokenizer, zText, *piRowid); if( rc!=SQLITE_OK ) return rc; for(e=HashFirst(&terms); e; e=HashNext(e)){ DocList *p = HashData(e); rc = index_insert_term(v, HashKey(e), HashKeysize(e), *piRowid, p); if( rc!=SQLITE_OK ) break; } for(e=HashFirst(&terms); e; e=HashNext(e)){ DocList *p = HashData(e); docListDelete(p); } HashClear(&terms); return rc;}static int index_delete_term(fulltext_vtab *v, const char *zTerm, int nTerm, sqlite_int64 iDocid){ sqlite_int64 iFirst; sqlite_int64 iIndexRow; DocList doclist; int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst); if( rc!=SQLITE_ROW ) return SQLITE_ERROR; rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist); if( rc!=SQLITE_OK ) return rc; if( docListUpdate(&doclist, iDocid, NULL) ){ if( doclist.nData>0 ){ rc = term_update(v, iIndexRow, &doclist); } else { /* empty posting list */ rc = term_delete(v, iIndexRow); } } docListDestroy(&doclist); return rc;}/* Delete a row from the full-text index. */static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ char *zText; Hash terms; HashElem *e; int rc = content_select(v, iRow, &zText); if( rc!=SQLITE_OK ) return rc; rc = build_terms(&terms, v->pTokenizer, zText, iRow); free(zText); if( rc!=SQLITE_OK ) return rc; for(e=HashFirst(&terms); e; e=HashNext(e)){ rc = index_delete_term(v, HashKey(e), HashKeysize(e), iRow); if( rc!=SQLITE_OK ) break; } for(e=HashFirst(&terms); e; e=HashNext(e)){ DocList *p = HashData(e); docListDelete(p); } HashClear(&terms); return content_delete(v, iRow);}static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, sqlite_int64 *pRowid){ fulltext_vtab *v = (fulltext_vtab *) pVtab; if( nArg<2 ){ return index_delete(v, sqlite3_value_int64(ppArg[0])); } if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ return SQLITE_ERROR; /* an update; not yet supported */ } assert( nArg==3 ); /* ppArg[1] = rowid, ppArg[2] = content */ return index_insert(v, ppArg[1], (const char *)sqlite3_value_text(ppArg[2]), pRowid);}static sqlite3_module fulltextModule = { 0, fulltextCreate, fulltextConnect, fulltextBestIndex, fulltextDisconnect, fulltextDestroy, fulltextOpen, fulltextClose, fulltextFilter, fulltextNext, fulltextEof, fulltextColumn, fulltextRowid, fulltextUpdate};int fulltext_init(sqlite3 *db){ return sqlite3_create_module(db, "fulltext", &fulltextModule, 0);}#if !SQLITE_COREint sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi){ SQLITE_EXTENSION_INIT2(pApi) return fulltext_init(db);}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -