📄 fts1.c
字号:
** output: [ ] ...
*/
void tokenListToIdList(char **azIn){
int i, j;
if( azIn ){
for(i=0, j=-1; azIn[i]; i++){
if( isalnum(azIn[i][0]) || azIn[i][1] ){
dequoteString(azIn[i]);
if( j>=0 ){
azIn[j] = azIn[i];
}
j++;
}
}
azIn[j] = 0;
}
}
/*
** Find the first alphanumeric token in the string zIn. Null-terminate
** this token. Remove any quotation marks. And return a pointer to
** the result.
*/
static char *firstToken(char *zIn, char **pzTail){
int i, n, ttype;
i = 0;
while(1){
n = getToken(zIn, &ttype);
if( ttype==TOKEN_SPACE ){
zIn += n;
}else if( ttype==TOKEN_EOF ){
*pzTail = zIn;
return 0;
}else{
zIn[n] = 0;
*pzTail = &zIn[1];
dequoteString(zIn);
return zIn;
}
}
/*NOTREACHED*/
}
/* Return true if...
**
** * s begins with the string t, ignoring case
** * s is longer than t
** * The first character of s beyond t is not a alphanumeric
**
** Ignore leading space in *s.
**
** To put it another way, return true if the first token of
** s[] is t[].
*/
static int startsWith(const char *s, const char *t){
while( isspace(*s) ){ s++; }
while( *t ){
if( tolower(*s++)!=tolower(*t++) ) return 0;
}
return *s!='_' && !isalnum(*s);
}
/*
** An instance of this structure defines the "spec" of a
** full text index. This structure is populated by parseSpec
** and use by fulltextConnect and fulltextCreate.
*/
typedef struct TableSpec {
const char *zName; /* Name of the full-text index */
int nColumn; /* Number of columns to be indexed */
char **azColumn; /* Original names of columns to be indexed */
char **azContentColumn; /* Column names for %_content */
char **azTokenizer; /* Name of tokenizer and its arguments */
} TableSpec;
/*
** Reclaim all of the memory used by a TableSpec
*/
void clearTableSpec(TableSpec *p) {
free(p->azColumn);
free(p->azContentColumn);
free(p->azTokenizer);
}
/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
*
* CREATE VIRTUAL TABLE email
* USING fts1(subject, body, tokenize mytokenizer(myarg))
*
* We return parsed information in a TableSpec structure.
*
*/
int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, char**pzErr){
int i, j, n;
char *z, *zDummy;
char **azArg;
const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */
assert( argc>=3 );
/* Current interface:
** argv[0] - module name
** argv[1] - database name
** argv[2] - table name
** argv[3..] - columns, optionally followed by tokenizer specification
** and snippet delimiters specification.
*/
/* Make a copy of the complete argv[][] array in a single allocation.
** The argv[][] array is read-only and transient. We can write to the
** copy in order to modify things and the copy is persistent.
*/
memset(pSpec, 0, sizeof(*pSpec));
for(i=n=0; i<argc; i++){
n += strlen(argv[i]) + 1;
}
azArg = malloc( sizeof(char*)*argc + n );
if( azArg==0 ){
return SQLITE_NOMEM;
}
z = (char*)&azArg[argc];
for(i=0; i<argc; i++){
azArg[i] = z;
strcpy(z, argv[i]);
z += strlen(z)+1;
}
/* Identify the column names and the tokenizer and delimiter arguments
** in the argv[][] array.
*/
pSpec->zName = azArg[2];
pSpec->nColumn = 0;
pSpec->azColumn = azArg;
zTokenizer = "tokenize simple";
for(i=3, j=0; i<argc; ++i){
if( startsWith(azArg[i],"tokenize") ){
zTokenizer = azArg[i];
}else{
z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
pSpec->nColumn++;
}
}
if( pSpec->nColumn==0 ){
azArg[0] = "content";
pSpec->nColumn = 1;
}
/*
** Construct the list of content column names.
**
** Each content column name will be of the form cNNAAAA
** where NN is the column number and AAAA is the sanitized
** column name. "sanitized" means that special characters are
** converted to "_". The cNN prefix guarantees that all column
** names are unique.
**
** The AAAA suffix is not strictly necessary. It is included
** for the convenience of people who might examine the generated
** %_content table and wonder what the columns are used for.
*/
pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
if( pSpec->azContentColumn==0 ){
clearTableSpec(pSpec);
return SQLITE_NOMEM;
}
for(i=0; i<pSpec->nColumn; i++){
char *p;
pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
for (p = pSpec->azContentColumn[i]; *p ; ++p) {
if( !isalnum(*p) ) *p = '_';
}
}
/*
** Parse the tokenizer specification string.
*/
pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
tokenListToIdList(pSpec->azTokenizer);
return SQLITE_OK;
}
/*
** Generate a CREATE TABLE statement that describes the schema of
** the virtual table. Return a pointer to this schema string.
**
** Space is obtained from sqlite3_mprintf() and should be freed
** using sqlite3_free().
*/
static char *fulltextSchema(
int nColumn, /* Number of columns */
const char *const* azColumn, /* List of columns */
const char *zTableName /* Name of the table */
){
int i;
char *zSchema, *zNext;
const char *zSep = "(";
zSchema = sqlite3_mprintf("CREATE TABLE x");
for(i=0; i<nColumn; i++){
zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
sqlite3_free(zSchema);
zSchema = zNext;
zSep = ",";
}
zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
sqlite3_free(zSchema);
return zNext;
}
/*
** Build a new sqlite3_vtab structure that will describe the
** fulltext index defined by spec.
*/
static int constructVtab(
sqlite3 *db, /* The SQLite database connection */
TableSpec *spec, /* Parsed spec information from parseSpec() */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */
){
int rc;
int n;
fulltext_vtab *v = 0;
const sqlite3_tokenizer_module *m = NULL;
char *schema;
v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
if( v==0 ) return SQLITE_NOMEM;
memset(v, 0, sizeof(*v));
/* sqlite will initialize v->base */
v->db = db;
v->zName = spec->zName; /* Freed when azColumn is freed */
v->nColumn = spec->nColumn;
v->azContentColumn = spec->azContentColumn;
spec->azContentColumn = 0;
v->azColumn = spec->azColumn;
spec->azColumn = 0;
if( spec->azTokenizer==0 ){
return SQLITE_NOMEM;
}
/* TODO(shess) For now, add new tokenizers as else if clauses. */
if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
sqlite3Fts1SimpleTokenizerModule(&m);
}else if( startsWith(spec->azTokenizer[0], "porter") ){
sqlite3Fts1PorterTokenizerModule(&m);
}else{
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
rc = SQLITE_ERROR;
goto err;
}
for(n=0; spec->azTokenizer[n]; n++){}
if( n ){
rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
&v->pTokenizer);
}else{
rc = m->xCreate(0, 0, &v->pTokenizer);
}
if( rc!=SQLITE_OK ) goto err;
v->pTokenizer->pModule = m;
/* TODO: verify the existence of backing tables foo_content, foo_term */
schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
spec->zName);
rc = sqlite3_declare_vtab(db, schema);
sqlite3_free(schema);
if( rc!=SQLITE_OK ) goto err;
memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
*ppVTab = &v->base;
TRACE(("FTS1 Connect %p\n", v));
return rc;
err:
fulltext_vtab_destroy(v);
return rc;
}
static int fulltextConnect(
sqlite3 *db,
void *pAux,
int argc, const char *const*argv,
sqlite3_vtab **ppVTab,
char **pzErr
){
TableSpec spec;
int rc = parseSpec(&spec, argc, argv, pzErr);
if( rc!=SQLITE_OK ) return rc;
rc = constructVtab(db, &spec, ppVTab, pzErr);
clearTableSpec(&spec);
return rc;
}
/* The %_content table holds the text of each document, with
** the rowid used as the docid.
**
** The %_term table maps each term to a document list blob
** containing elements sorted by ascending docid, each element
** encoded as:
**
** docid varint-encoded
** token elements:
** position+1 varint-encoded as delta from previous position
** start offset varint-encoded as delta from previous start offset
** end offset varint-encoded as delta from start offset
**
** The sentinel position of 0 indicates the end of the token list.
**
** Additionally, doclist blobs are chunked into multiple segments,
** using segment to order the segments. New elements are added to
** the segment at segment 0, until it exceeds CHUNK_MAX. Then
** segment 0 is deleted, and the doclist is inserted at segment 1.
** If there is already a doclist at segment 1, the segment 0 doclist
** is merged with it, the segment 1 doclist is deleted, and the
** merged doclist is inserted at segment 2, repeating those
** operations until an insert succeeds.
**
** Since this structure doesn't allow us to update elements in place
** in case of deletion or update, these are simply written to
** segment 0 (with an empty token list in case of deletion), with
** docListAccumulate() taking care to retain lower-segment
** information in preference to higher-segment information.
*/
/* TODO(shess) Provide a VACUUM type operation which both removes
** deleted elements which are no longer necessary, and duplicated
** elements. I suspect this will probably not be necessary in
** practice, though.
*/
static int fulltextCreate(sqlite3 *db, void *pAux,
int argc, const char * const *argv,
sqlite3_vtab **ppVTab, char **pzErr){
int rc;
TableSpec spec;
StringBuffer schema;
TRACE(("FTS1 Create\n"));
rc = parseSpec(&spec, argc, argv, pzErr);
if( rc!=SQLITE_OK ) return rc;
initStringBuffer(&schema);
append(&schema, "CREATE TABLE %_content(");
appendList(&schema, spec.nColumn, spec.azContentColumn);
append(&schema, ")");
rc = sql_exec(db, spec.zName, schema.s);
free(schema.s);
if( rc!=SQLITE_OK ) goto out;
rc = sql_exec(db, spec.zName,
"create table %_term(term text, segment integer, doclist blob, "
"primary key(term, segment));");
if( rc!=SQLITE_OK ) goto out;
rc = constructVtab(db, &spec, ppVTab, pzErr);
out:
clearTableSpec(&spec);
return rc;
}
/* Decide how to handle an SQL query. */
static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
int i;
for(i=0; i<pInfo->nConstraint; ++i){
const struct sqlite3_index_constraint *pConstraint;
pConstraint = &pInfo->aConstraint[i];
if( pConstraint->usable ) {
if( pConstraint->iColumn==-1 &&
pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */
} else if( pConstraint->iColumn>=0 &&
pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
/* full-text search */
pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
} else continue;
pInfo->aConstraintUsage[i].argvIndex = 1;
pInfo->aConstraintUsage[i].omit = 1;
/* An arbitrary value for now.
* TODO: Perhaps rowid matches should be considered cheaper than
* full-text searches. */
pInfo->estimatedCost = 1.0;
return SQLITE_OK;
}
}
pInfo->idxNum = QUERY_GENERIC;
TRACE(("FTS1 BestIndex\n"));
return SQLITE_OK;
}
static int fulltextDisconnect(sqlite3_vtab *pVTab){
TRACE(("FTS1 Disconnect %p\n", pVTab));
fulltext_vtab_destroy((fulltext_vtab *)pVTab);
return SQLITE_OK;
}
static int fulltextDestroy(sqlite3_vtab *pVTab){
fulltext_vtab *v = (fulltext_vtab *)pVTab;
int rc;
TRACE(("FTS1 Destroy %p\n", pVTab));
rc = sql_exec(v->db, v->zName,
"drop table %_content; drop table %_term");
if( rc!=SQLITE_OK ) return rc;
fulltext_vtab_destroy((fulltext_vtab *)pVTab);
return SQLITE_OK;
}
static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
fulltext_cursor *c;
c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -