📄 fulltext.c

📁 最新的sqlite3.6.2源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
static int fulltextNext(sqlite3_vtab_cursor *pCursor){  fulltext_cursor *c = (fulltext_cursor *) pCursor;  sqlite_int64 iDocid;  int rc;  switch( c->iCursorType ){    case QUERY_GENERIC:      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */      rc = sqlite3_step(c->pStmt);      switch( rc ){        case SQLITE_ROW:          c->eof = 0;          return SQLITE_OK;        case SQLITE_DONE:          c->eof = 1;          return SQLITE_OK;        default:          c->eof = 1;          return rc;      }    case QUERY_FULLTEXT:      rc = sqlite3_reset(c->pStmt);      if( rc!=SQLITE_OK ) return rc;      if( readerAtEnd(&c->result)){        c->eof = 1;        return SQLITE_OK;      }      iDocid = readDocid(&c->result);      rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);      if( rc!=SQLITE_OK ) return rc;      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */      rc = sqlite3_step(c->pStmt);      if( rc==SQLITE_ROW ){   /* the case we expect */        c->eof = 0;        return SQLITE_OK;      }      /* an error occurred; abort */      return rc==SQLITE_DONE ? SQLITE_ERROR : rc;    default:      assert( 0 );      return SQLITE_ERROR;  /* not reached */  }}static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm,                               sqlite3_stmt **ppStmt){  int rc;  if( *ppStmt ){    rc = sqlite3_reset(*ppStmt);  } else {    rc = sql_prepare(v->db, v->zName, ppStmt,      "select doclist from %_term where term = ? order by first");  }  if( rc!=SQLITE_OK ) return rc;  rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT);  if( rc!=SQLITE_OK ) return rc;  return sqlite3_step(*ppStmt);   /* TODO(adamd): handle schema error */}/* Read the posting list for [zTerm]; AND it with the doclist [in] to * produce the doclist [out], using the given offset [iOffset] for phrase * matching. * (*pSelect) is used to hold an SQLite statement used inside this function; * the caller should initialize *pSelect to NULL before the first call. */static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect,                       const char *zTerm,                       DocList *pIn, int iOffset, DocList *out){  int rc;  DocListMerge merge;  if( pIn!=NULL && !pIn->nData ){    /* If [pIn] is already empty, there's no point in reading the     * posting list to AND it in; return immediately. */      return SQLITE_OK;  }  rc = term_select_doclist(v, zTerm, -1, pSelect);  if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;  mergeInit(&merge, pIn, iOffset, out);  while( rc==SQLITE_ROW ){    DocList block;    docListInit(&block, DL_POSITIONS_OFFSETS,                sqlite3_column_blob(*pSelect, 0),                sqlite3_column_bytes(*pSelect, 0));    mergeBlock(&merge, &block);    docListDestroy(&block);    rc = sqlite3_step(*pSelect);    if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){      return rc;    }  }    return SQLITE_OK;}typedef struct QueryTerm {  int is_phrase;    /* true if this term begins a new phrase */  const char *zTerm;} QueryTerm;/* A parsed query. * * As an example, parsing the query ["four score" years "new nation"] will * yield a Query with 5 terms: *   "four",   is_phrase = 1 *   "score",  is_phrase = 0 *   "years",  is_phrase = 1 *   "new",    is_phrase = 1 *   "nation", is_phrase = 0 */typedef struct Query {  int nTerms;  QueryTerm *pTerm;} Query;static void query_add(Query *q, int is_phrase, const char *zTerm){  QueryTerm *t;  ++q->nTerms;  q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0]));  t = &q->pTerm[q->nTerms - 1];  t->is_phrase = is_phrase;  t->zTerm = zTerm;}    static void query_free(Query *q){  int i;  for(i = 0; i < q->nTerms; ++i){    free((void *) q->pTerm[i].zTerm);  }  free(q->pTerm);}static int tokenize_segment(sqlite3_tokenizer *pTokenizer,                            const char *zQuery, int in_phrase,                            Query *pQuery){  sqlite3_tokenizer_module *pModule = pTokenizer->pModule;  sqlite3_tokenizer_cursor *pCursor;  int is_first = 1;    int rc = pModule->xOpen(pTokenizer, zQuery, -1, &pCursor);  if( rc!=SQLITE_OK ) return rc;  pCursor->pTokenizer = pTokenizer;  while( 1 ){    const char *zToken;    int nToken, iStartOffset, iEndOffset, dummy_pos;    rc = pModule->xNext(pCursor,                        &zToken, &nToken,                        &iStartOffset, &iEndOffset,                        &dummy_pos);    if( rc!=SQLITE_OK ) break;    query_add(pQuery, !in_phrase || is_first, string_dup_n(zToken, nToken));    is_first = 0;  }  return pModule->xClose(pCursor);}/* Parse a query string, yielding a Query object. */static int parse_query(fulltext_vtab *v, const char *zQuery, Query *pQuery){  char *zQuery1 = string_dup(zQuery);  int in_phrase = 0;  char *s = zQuery1;  pQuery->nTerms = 0;  pQuery->pTerm = NULL;  while( *s ){    char *t = s;    while( *t ){      if( *t=='"' ){        *t++ = '\0';        break;      }      ++t;    }    if( *s ){      tokenize_segment(v->pTokenizer, s, in_phrase, pQuery);    }    s = t;    in_phrase = !in_phrase;  }    free(zQuery1);  return SQLITE_OK;}/* Perform a full-text query; return a list of documents in [pResult]. */static int fulltext_query(fulltext_vtab *v, const char *zQuery,                          DocList **pResult){  Query q;  int phrase_start = -1;  int i;  sqlite3_stmt *pSelect = NULL;  DocList *d = NULL;  int rc = parse_query(v, zQuery, &q);  if( rc!=SQLITE_OK ) return rc;  /* Merge terms. */  for(i = 0 ; i < q.nTerms ; ++i){    /* In each merge step, we need to generate positions whenever we're     * processing a phrase which hasn't ended yet. */    int need_positions = i<q.nTerms-1 && !q.pTerm[i+1].is_phrase;    DocList *next = docListNew(need_positions ? DL_POSITIONS : DL_DOCIDS);    if( q.pTerm[i].is_phrase ){      phrase_start = i;    }    rc = query_merge(v, &pSelect, q.pTerm[i].zTerm, d, i - phrase_start, next);    if( rc!=SQLITE_OK ) break;    if( d!=NULL ){      docListDelete(d);    }    d = next;  }  sqlite3_finalize(pSelect);  query_free(&q);  *pResult = d;  return rc;}static int fulltextFilter(sqlite3_vtab_cursor *pCursor,                          int idxNum, const char *idxStr,                          int argc, sqlite3_value **argv){  fulltext_cursor *c = (fulltext_cursor *) pCursor;  fulltext_vtab *v = cursor_vtab(c);  int rc;  const char *zStatement;  c->iCursorType = idxNum;  switch( idxNum ){    case QUERY_GENERIC:      zStatement = "select rowid, content from %_content";      break;    case QUERY_FULLTEXT:   /* full-text search */    {      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);      DocList *pResult;      assert( argc==1 );      rc = fulltext_query(v, zQuery, &pResult);      if( rc!=SQLITE_OK ) return rc;      readerInit(&c->result, pResult);      zStatement = "select rowid, content from %_content where rowid = ?";      break;    }    default:      assert( 0 );  }  rc = sql_prepare(v->db, v->zName, &c->pStmt, zStatement);  if( rc!=SQLITE_OK ) return rc;  return fulltextNext(pCursor);}static int fulltextEof(sqlite3_vtab_cursor *pCursor){  fulltext_cursor *c = (fulltext_cursor *) pCursor;  return c->eof;}static int fulltextColumn(sqlite3_vtab_cursor *pCursor,                          sqlite3_context *pContext, int idxCol){  fulltext_cursor *c = (fulltext_cursor *) pCursor;  const char *s;  assert( idxCol==0 );  s = (const char *) sqlite3_column_text(c->pStmt, 1);  sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT);  return SQLITE_OK;}static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){  fulltext_cursor *c = (fulltext_cursor *) pCursor;  *pRowid = sqlite3_column_int64(c->pStmt, 0);  return SQLITE_OK;}/* Build a hash table containing all terms in zText. */static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer,                       const char *zText, sqlite_int64 iDocid){  sqlite3_tokenizer_cursor *pCursor;  const char *pToken;  int nTokenBytes;  int iStartOffset, iEndOffset, iPosition;  int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);  if( rc!=SQLITE_OK ) return rc;  pCursor->pTokenizer = pTokenizer;  HashInit(terms, HASH_STRING, 1);  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,                                               &pToken, &nTokenBytes,                                               &iStartOffset, &iEndOffset,                                               &iPosition) ){    DocList *p;    /* Positions can't be negative; we use -1 as a terminator internally. */    if( iPosition<0 ) {      rc = SQLITE_ERROR;        goto err;    }    p = HashFind(terms, pToken, nTokenBytes);    if( p==NULL ){      p = docListNew(DL_POSITIONS_OFFSETS);      docListAddDocid(p, iDocid);      HashInsert(terms, pToken, nTokenBytes, p);    }    docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset);  }err:  /* TODO(shess) Check return?  Should this be able to cause errors at  ** this point?  Actually, same question about sqlite3_finalize(),  ** though one could argue that failure there means that the data is  ** not durable.  *ponder*  */  pTokenizer->pModule->xClose(pCursor);  return rc;}/* Update the %_terms table to map the term [zTerm] to the given rowid. */static int index_insert_term(fulltext_vtab *v, const char *zTerm, int nTerm,                             sqlite_int64 iDocid, DocList *p){  sqlite_int64 iFirst;  sqlite_int64 iIndexRow;  DocList doclist;  int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst);  if( rc==SQLITE_DONE ){    docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);    if( docListUpdate(&doclist, iDocid, p) ){      rc = term_insert(v, zTerm, nTerm, iDocid, &doclist);      docListDestroy(&doclist);      return rc;    }    return SQLITE_OK;  }  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;  /* This word is in the index; add this document ID to its blob. */  rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist);  if( rc!=SQLITE_OK ) return rc;  if( docListUpdate(&doclist, iDocid, p) ){    /* If the blob is too big, split it in half. */    if( doclist.nData>CHUNK_MAX ){      DocList half;      if( docListSplit(&doclist, &half) ){        rc = term_insert(v, zTerm, nTerm, firstDocid(&half), &half);        docListDestroy(&half);        if( rc!=SQLITE_OK ) goto err;      }    }    rc = term_update(v, iIndexRow, &doclist);  }err:  docListDestroy(&doclist);  return rc;}/* Insert a row into the full-text index; set *piRowid to be the ID of the * new row. */static int index_insert(fulltext_vtab *v,                        sqlite3_value *pRequestRowid, const char *zText,                        sqlite_int64 *piRowid){  Hash terms;  /* maps term string -> PosList */  HashElem *e;  int rc = content_insert(v, pRequestRowid, zText, -1);  if( rc!=SQLITE_OK ) return rc;  *piRowid = sqlite3_last_insert_rowid(v->db);  if( !zText ) return SQLITE_OK;   /* nothing to index */  rc = build_terms(&terms, v->pTokenizer, zText, *piRowid);  if( rc!=SQLITE_OK ) return rc;  for(e=HashFirst(&terms); e; e=HashNext(e)){    DocList *p = HashData(e);    rc = index_insert_term(v, HashKey(e), HashKeysize(e), *piRowid, p);    if( rc!=SQLITE_OK ) break;  }  for(e=HashFirst(&terms); e; e=HashNext(e)){    DocList *p = HashData(e);    docListDelete(p);  }  HashClear(&terms);  return rc;}static int index_delete_term(fulltext_vtab *v, const char *zTerm, int nTerm,                             sqlite_int64 iDocid){  sqlite_int64 iFirst;  sqlite_int64 iIndexRow;  DocList doclist;  int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst);  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;  rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist);  if( rc!=SQLITE_OK ) return rc;  if( docListUpdate(&doclist, iDocid, NULL) ){    if( doclist.nData>0 ){      rc = term_update(v, iIndexRow, &doclist);    } else {  /* empty posting list */      rc = term_delete(v, iIndexRow);    }  }  docListDestroy(&doclist);  return rc;}/* Delete a row from the full-text index. */static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){  char *zText;  Hash terms;  HashElem *e;  int rc = content_select(v, iRow, &zText);  if( rc!=SQLITE_OK ) return rc;  rc = build_terms(&terms, v->pTokenizer, zText, iRow);  free(zText);  if( rc!=SQLITE_OK ) return rc;  for(e=HashFirst(&terms); e; e=HashNext(e)){    rc = index_delete_term(v, HashKey(e), HashKeysize(e), iRow);    if( rc!=SQLITE_OK ) break;  }  for(e=HashFirst(&terms); e; e=HashNext(e)){    DocList *p = HashData(e);    docListDelete(p);  }  HashClear(&terms);  return content_delete(v, iRow);}static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,                   sqlite_int64 *pRowid){  fulltext_vtab *v = (fulltext_vtab *) pVtab;  if( nArg<2 ){    return index_delete(v, sqlite3_value_int64(ppArg[0]));  }  if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){    return SQLITE_ERROR;   /* an update; not yet supported */  }  assert( nArg==3 );    /* ppArg[1] = rowid, ppArg[2] = content */  return index_insert(v, ppArg[1],                      (const char *)sqlite3_value_text(ppArg[2]), pRowid);}static sqlite3_module fulltextModule = {  0,  fulltextCreate,  fulltextConnect,  fulltextBestIndex,  fulltextDisconnect,  fulltextDestroy,  fulltextOpen,  fulltextClose,  fulltextFilter,  fulltextNext,  fulltextEof,  fulltextColumn,  fulltextRowid,  fulltextUpdate};int fulltext_init(sqlite3 *db){ return sqlite3_create_module(db, "fulltext", &fulltextModule, 0);}#if !SQLITE_COREint sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,                           const sqlite3_api_routines *pApi){ SQLITE_EXTENSION_INIT2(pApi) return fulltext_init(db);}#endif
上一页 1 23
💿 文件大小 2259 K
👤 上传用户 dante55
📂 所属分类其他数据库
🏷️ 相关标签

#sqlite #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -