📄 fts1.c
字号:
*/static char *firstToken(char *zIn, char **pzTail){ int n, ttype; while(1){ n = getToken(zIn, &ttype); if( ttype==TOKEN_SPACE ){ zIn += n; }else if( ttype==TOKEN_EOF ){ *pzTail = zIn; return 0; }else{ zIn[n] = 0; *pzTail = &zIn[1]; dequoteString(zIn); return zIn; } } /*NOTREACHED*/}/* Return true if...**** * s begins with the string t, ignoring case** * s is longer than t** * The first character of s beyond t is not a alphanumeric** ** Ignore leading space in *s.**** To put it another way, return true if the first token of** s[] is t[].*/static int startsWith(const char *s, const char *t){ while( safe_isspace(*s) ){ s++; } while( *t ){ if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; } return *s!='_' && !safe_isalnum(*s);}/*** An instance of this structure defines the "spec" of a** full text index. This structure is populated by parseSpec** and use by fulltextConnect and fulltextCreate.*/typedef struct TableSpec { const char *zDb; /* Logical database name */ const char *zName; /* Name of the full-text index */ int nColumn; /* Number of columns to be indexed */ char **azColumn; /* Original names of columns to be indexed */ char **azContentColumn; /* Column names for %_content */ char **azTokenizer; /* Name of tokenizer and its arguments */} TableSpec;/*** Reclaim all of the memory used by a TableSpec*/static void clearTableSpec(TableSpec *p) { free(p->azColumn); free(p->azContentColumn); free(p->azTokenizer);}/* Parse a CREATE VIRTUAL TABLE statement, which looks like this: * * CREATE VIRTUAL TABLE email * USING fts1(subject, body, tokenize mytokenizer(myarg)) * * We return parsed information in a TableSpec structure. * */static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, char**pzErr){ int i, n; char *z, *zDummy; char **azArg; const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */ assert( argc>=3 ); /* Current interface: ** argv[0] - module name ** argv[1] - database name ** argv[2] - table name ** argv[3..] - columns, optionally followed by tokenizer specification ** and snippet delimiters specification. */ /* Make a copy of the complete argv[][] array in a single allocation. ** The argv[][] array is read-only and transient. We can write to the ** copy in order to modify things and the copy is persistent. */ memset(pSpec, 0, sizeof(*pSpec)); for(i=n=0; i<argc; i++){ n += strlen(argv[i]) + 1; } azArg = malloc( sizeof(char*)*argc + n ); if( azArg==0 ){ return SQLITE_NOMEM; } z = (char*)&azArg[argc]; for(i=0; i<argc; i++){ azArg[i] = z; strcpy(z, argv[i]); z += strlen(z)+1; } /* Identify the column names and the tokenizer and delimiter arguments ** in the argv[][] array. */ pSpec->zDb = azArg[1]; pSpec->zName = azArg[2]; pSpec->nColumn = 0; pSpec->azColumn = azArg; zTokenizer = "tokenize simple"; for(i=3; i<argc; ++i){ if( startsWith(azArg[i],"tokenize") ){ zTokenizer = azArg[i]; }else{ z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy); pSpec->nColumn++; } } if( pSpec->nColumn==0 ){ azArg[0] = "content"; pSpec->nColumn = 1; } /* ** Construct the list of content column names. ** ** Each content column name will be of the form cNNAAAA ** where NN is the column number and AAAA is the sanitized ** column name. "sanitized" means that special characters are ** converted to "_". The cNN prefix guarantees that all column ** names are unique. ** ** The AAAA suffix is not strictly necessary. It is included ** for the convenience of people who might examine the generated ** %_content table and wonder what the columns are used for. */ pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) ); if( pSpec->azContentColumn==0 ){ clearTableSpec(pSpec); return SQLITE_NOMEM; } for(i=0; i<pSpec->nColumn; i++){ char *p; pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); for (p = pSpec->azContentColumn[i]; *p ; ++p) { if( !safe_isalnum(*p) ) *p = '_'; } } /* ** Parse the tokenizer specification string. */ pSpec->azTokenizer = tokenizeString(zTokenizer, &n); tokenListToIdList(pSpec->azTokenizer); return SQLITE_OK;}/*** Generate a CREATE TABLE statement that describes the schema of** the virtual table. Return a pointer to this schema string.**** Space is obtained from sqlite3_mprintf() and should be freed** using sqlite3_free().*/static char *fulltextSchema( int nColumn, /* Number of columns */ const char *const* azColumn, /* List of columns */ const char *zTableName /* Name of the table */){ int i; char *zSchema, *zNext; const char *zSep = "("; zSchema = sqlite3_mprintf("CREATE TABLE x"); for(i=0; i<nColumn; i++){ zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]); sqlite3_free(zSchema); zSchema = zNext; zSep = ","; } zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName); sqlite3_free(zSchema); return zNext;}/*** Build a new sqlite3_vtab structure that will describe the** fulltext index defined by spec.*/static int constructVtab( sqlite3 *db, /* The SQLite database connection */ TableSpec *spec, /* Parsed spec information from parseSpec() */ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */){ int rc; int n; fulltext_vtab *v = 0; const sqlite3_tokenizer_module *m = NULL; char *schema; v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); if( v==0 ) return SQLITE_NOMEM; memset(v, 0, sizeof(*v)); /* sqlite will initialize v->base */ v->db = db; v->zDb = spec->zDb; /* Freed when azColumn is freed */ v->zName = spec->zName; /* Freed when azColumn is freed */ v->nColumn = spec->nColumn; v->azContentColumn = spec->azContentColumn; spec->azContentColumn = 0; v->azColumn = spec->azColumn; spec->azColumn = 0; if( spec->azTokenizer==0 ){ return SQLITE_NOMEM; } /* TODO(shess) For now, add new tokenizers as else if clauses. */ if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ sqlite3Fts1SimpleTokenizerModule(&m); }else if( startsWith(spec->azTokenizer[0], "porter") ){ sqlite3Fts1PorterTokenizerModule(&m); }else{ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); rc = SQLITE_ERROR; goto err; } for(n=0; spec->azTokenizer[n]; n++){} if( n ){ rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], &v->pTokenizer); }else{ rc = m->xCreate(0, 0, &v->pTokenizer); } if( rc!=SQLITE_OK ) goto err; v->pTokenizer->pModule = m; /* TODO: verify the existence of backing tables foo_content, foo_term */ schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, spec->zName); rc = sqlite3_declare_vtab(db, schema); sqlite3_free(schema); if( rc!=SQLITE_OK ) goto err; memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); *ppVTab = &v->base; TRACE(("FTS1 Connect %p\n", v)); return rc;err: fulltext_vtab_destroy(v); return rc;}static int fulltextConnect( sqlite3 *db, void *pAux, int argc, const char *const*argv, sqlite3_vtab **ppVTab, char **pzErr){ TableSpec spec; int rc = parseSpec(&spec, argc, argv, pzErr); if( rc!=SQLITE_OK ) return rc; rc = constructVtab(db, &spec, ppVTab, pzErr); clearTableSpec(&spec); return rc;} /* The %_content table holds the text of each document, with ** the rowid used as the docid. ** ** The %_term table maps each term to a document list blob ** containing elements sorted by ascending docid, each element ** encoded as: ** ** docid varint-encoded ** token elements: ** position+1 varint-encoded as delta from previous position ** start offset varint-encoded as delta from previous start offset ** end offset varint-encoded as delta from start offset ** ** The sentinel position of 0 indicates the end of the token list. ** ** Additionally, doclist blobs are chunked into multiple segments, ** using segment to order the segments. New elements are added to ** the segment at segment 0, until it exceeds CHUNK_MAX. Then ** segment 0 is deleted, and the doclist is inserted at segment 1. ** If there is already a doclist at segment 1, the segment 0 doclist ** is merged with it, the segment 1 doclist is deleted, and the ** merged doclist is inserted at segment 2, repeating those ** operations until an insert succeeds. ** ** Since this structure doesn't allow us to update elements in place ** in case of deletion or update, these are simply written to ** segment 0 (with an empty token list in case of deletion), with ** docListAccumulate() taking care to retain lower-segment ** information in preference to higher-segment information. */ /* TODO(shess) Provide a VACUUM type operation which both removes ** deleted elements which are no longer necessary, and duplicated ** elements. I suspect this will probably not be necessary in ** practice, though. */static int fulltextCreate(sqlite3 *db, void *pAux, int argc, const char * const *argv, sqlite3_vtab **ppVTab, char **pzErr){ int rc; TableSpec spec; StringBuffer schema; TRACE(("FTS1 Create\n")); rc = parseSpec(&spec, argc, argv, pzErr); if( rc!=SQLITE_OK ) return rc; initStringBuffer(&schema); append(&schema, "CREATE TABLE %_content("); appendList(&schema, spec.nColumn, spec.azContentColumn); append(&schema, ")"); rc = sql_exec(db, spec.zDb, spec.zName, schema.s); free(schema.s); if( rc!=SQLITE_OK ) goto out; rc = sql_exec(db, spec.zDb, spec.zName, "create table %_term(term text, segment integer, doclist blob, " "primary key(term, segment));"); if( rc!=SQLITE_OK ) goto out; rc = constructVtab(db, &spec, ppVTab, pzErr);out: clearTableSpec(&spec); return rc;}/* Decide how to handle an SQL query. */static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ int i; TRACE(("FTS1 BestIndex\n")); for(i=0; i<pInfo->nConstraint; ++i){ const struct sqlite3_index_constraint *pConstraint; pConstraint = &pInfo->aConstraint[i]; if( pConstraint->usable ) { if( pConstraint->iColumn==-1 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */ TRACE(("FTS1 QUERY_ROWID\n")); } else if( pConstraint->iColumn>=0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ /* full-text search */ pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); } else continue; pInfo->aConstraintUsage[i].argvIndex = 1; pInfo->aConstraintUsage[i].omit = 1; /* An arbitrary value for now. * TODO: Perhaps rowid matches should be considered cheaper than * full-text searches. */ pInfo->estimatedCost = 1.0; return SQLITE_OK; } } pInfo->idxNum = QUERY_GENERIC; return SQLITE_OK;}static int fulltextDisconnect(sqlite3_vtab *pVTab){ TRACE(("FTS1 Disconnect %p\n", pVTab)); fulltext_vtab_destroy((fulltext_vtab *)pVTab); return SQLITE_OK;}static int fulltextDestroy(sqlite3_vtab *pVTab){ fulltext_vtab *v = (fulltext_vtab *)pVTab; int rc; TRACE(("FTS1 Destroy %p\n", pVTab)); rc = sql_exec(v->db, v->zDb, v->zName, "drop table if exists %_content;" "drop table if exists %_term;" ); if( rc!=SQLITE_OK ) return rc; fulltext_vtab_destroy((fulltext_vtab *)pVTab); return SQLITE_OK;}static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ fulltext_cursor *c; c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); /* sqlite will initialize c->base */ *ppCursor = &c->base; TRACE(("FTS1 Open %p: %p\n", pVTab, c)); return SQLITE_OK;}/* Free all of the dynamically allocated memory held by *q*/static void queryClear(Query *q){ int i; for(i = 0; i < q->nTerms; ++i){ free(q->pTerms[i].pTerm); } free(q->pTerms); memset(q, 0, sizeof(*q));}/* Free all of the dynamically allocated memory held by the** Snippet*/static void snippetClear(Snippet *p){ free(p->aMatch); free(p->zOffset); free(p->zSnippet); memset(p, 0, sizeof(*p));}/*** Append a sing
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -