⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fts1.c

📁 sqlite-3.4.1,嵌入式数据库.是一个功能强大的开源数据库,给学习和研发以及小型公司的发展带来了全所未有的好处.
💻 C
📖 第 1 页 / 共 5 页
字号:
/* The author disclaims copyright to this source code. * * This is an SQLite module implementing full-text search. *//*** The code in this file is only compiled if:****     * The FTS1 module is being built as an extension**       (in which case SQLITE_CORE is not defined), or****     * The FTS1 module is being built into the core of**       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).*/#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)# define SQLITE_CORE 1#endif#include <assert.h>#if !defined(__APPLE__)#include <malloc.h>#else#include <stdlib.h>#endif#include <stdio.h>#include <string.h>#include <ctype.h>#include "fts1.h"#include "fts1_hash.h"#include "fts1_tokenizer.h"#include "sqlite3.h"#include "sqlite3ext.h"SQLITE_EXTENSION_INIT1#if 0# define TRACE(A)  printf A; fflush(stdout)#else# define TRACE(A)#endif/* utility functions */typedef struct StringBuffer {  int len;      /* length, not including null terminator */  int alloced;  /* Space allocated for s[] */   char *s;      /* Content of the string */} StringBuffer;static void initStringBuffer(StringBuffer *sb){  sb->len = 0;  sb->alloced = 100;  sb->s = malloc(100);  sb->s[0] = '\0';}static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){  if( sb->len + nFrom >= sb->alloced ){    sb->alloced = sb->len + nFrom + 100;    sb->s = realloc(sb->s, sb->alloced+1);    if( sb->s==0 ){      initStringBuffer(sb);      return;    }  }  memcpy(sb->s + sb->len, zFrom, nFrom);  sb->len += nFrom;  sb->s[sb->len] = 0;}static void append(StringBuffer *sb, const char *zFrom){  nappend(sb, zFrom, strlen(zFrom));}/* We encode variable-length integers in little-endian order using seven bits * per byte as follows:**** KEY:**         A = 0xxxxxxx    7 bits of data and one flag bit**         B = 1xxxxxxx    7 bits of data and one flag bit****  7 bits - A** 14 bits - BA** 21 bits - BBA** and so on.*//* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */#define VARINT_MAX 10/* Write a 64-bit variable-length integer to memory starting at p[0]. * The length of data written will be between 1 and VARINT_MAX bytes. * The number of bytes written is returned. */static int putVarint(char *p, sqlite_int64 v){  unsigned char *q = (unsigned char *) p;  sqlite_uint64 vu = v;  do{    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);    vu >>= 7;  }while( vu!=0 );  q[-1] &= 0x7f;  /* turn off high bit in final byte */  assert( q - (unsigned char *)p <= VARINT_MAX );  return (int) (q - (unsigned char *)p);}/* Read a 64-bit variable-length integer from memory starting at p[0]. * Return the number of bytes read, or 0 on error. * The value is stored in *v. */static int getVarint(const char *p, sqlite_int64 *v){  const unsigned char *q = (const unsigned char *) p;  sqlite_uint64 x = 0, y = 1;  while( (*q & 0x80) == 0x80 ){    x += y * (*q++ & 0x7f);    y <<= 7;    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */      assert( 0 );      return 0;    }  }  x += y * (*q++);  *v = (sqlite_int64) x;  return (int) (q - (unsigned char *)p);}static int getVarint32(const char *p, int *pi){ sqlite_int64 i; int ret = getVarint(p, &i); *pi = (int) i; assert( *pi==i ); return ret;}/*** Document lists *** * * A document list holds a sorted list of varint-encoded document IDs. * * A doclist with type DL_POSITIONS_OFFSETS is stored like this: * * array { *   varint docid; *   array { *     varint position;     (delta from previous position plus POS_BASE) *     varint startOffset;  (delta from previous startOffset) *     varint endOffset;    (delta from startOffset) *   } * } * * Here, array { X } means zero or more occurrences of X, adjacent in memory. * * A position list may hold positions for text in multiple columns.  A position * POS_COLUMN is followed by a varint containing the index of the column for * following positions in the list.  Any positions appearing before any * occurrences of POS_COLUMN are for column 0. * * A doclist with type DL_POSITIONS is like the above, but holds only docids * and positions without offset information. * * A doclist with type DL_DOCIDS is like the above, but holds only docids * without positions or offset information. * * On disk, every document list has positions and offsets, so we don't bother * to serialize a doclist's type. *  * We don't yet delta-encode document IDs; doing so will probably be a * modest win. * * NOTE(shess) I've thought of a slightly (1%) better offset encoding. * After the first offset, estimate the next offset by using the * current token position and the previous token position and offset, * offset to handle some variance.  So the estimate would be * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded * as normal.  Offsets more than 64 chars from the estimate are * encoded as the delta to the previous start offset + 128.  An * additional tiny increment can be gained by using the end offset of * the previous token to make the estimate a tiny bit more precise.*//* It is not safe to call isspace(), tolower(), or isalnum() on** hi-bit-set characters.  This is the same solution used in the** tokenizer.*//* TODO(shess) The snippet-generation code should be using the** tokenizer-generated tokens rather than doing its own local** tokenization.*//* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */static int safe_isspace(char c){  return (c&0x80)==0 ? isspace(c) : 0;}static int safe_tolower(char c){  return (c&0x80)==0 ? tolower(c) : c;}static int safe_isalnum(char c){  return (c&0x80)==0 ? isalnum(c) : 0;}typedef enum DocListType {  DL_DOCIDS,              /* docids only */  DL_POSITIONS,           /* docids + positions */  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */} DocListType;/*** By default, only positions and not offsets are stored in the doclists.** To change this so that offsets are stored too, compile with****          -DDL_DEFAULT=DL_POSITIONS_OFFSETS***/#ifndef DL_DEFAULT# define DL_DEFAULT DL_POSITIONS#endiftypedef struct DocList {  char *pData;  int nData;  DocListType iType;  int iLastColumn;    /* the last column written */  int iLastPos;       /* the last position written */  int iLastOffset;    /* the last start offset written */} DocList;enum {  POS_END = 0,        /* end of this position list */  POS_COLUMN,         /* followed by new column number */  POS_BASE};/* Initialize a new DocList to hold the given data. */static void docListInit(DocList *d, DocListType iType,                        const char *pData, int nData){  d->nData = nData;  if( nData>0 ){    d->pData = malloc(nData);    memcpy(d->pData, pData, nData);  } else {    d->pData = NULL;  }  d->iType = iType;  d->iLastColumn = 0;  d->iLastPos = d->iLastOffset = 0;}/* Create a new dynamically-allocated DocList. */static DocList *docListNew(DocListType iType){  DocList *d = (DocList *) malloc(sizeof(DocList));  docListInit(d, iType, 0, 0);  return d;}static void docListDestroy(DocList *d){  free(d->pData);#ifndef NDEBUG  memset(d, 0x55, sizeof(*d));#endif}static void docListDelete(DocList *d){  docListDestroy(d);  free(d);}static char *docListEnd(DocList *d){  return d->pData + d->nData;}/* Append a varint to a DocList's data. */static void appendVarint(DocList *d, sqlite_int64 i){  char c[VARINT_MAX];  int n = putVarint(c, i);  d->pData = realloc(d->pData, d->nData + n);  memcpy(d->pData + d->nData, c, n);  d->nData += n;}static void docListAddDocid(DocList *d, sqlite_int64 iDocid){  appendVarint(d, iDocid);  if( d->iType>=DL_POSITIONS ){    appendVarint(d, POS_END);  /* initially empty position list */    d->iLastColumn = 0;    d->iLastPos = d->iLastOffset = 0;  }}/* helper function for docListAddPos and docListAddPosOffset */static void addPos(DocList *d, int iColumn, int iPos){  assert( d->nData>0 );  --d->nData;  /* remove previous terminator */  if( iColumn!=d->iLastColumn ){    assert( iColumn>d->iLastColumn );    appendVarint(d, POS_COLUMN);    appendVarint(d, iColumn);    d->iLastColumn = iColumn;    d->iLastPos = d->iLastOffset = 0;  }  assert( iPos>=d->iLastPos );  appendVarint(d, iPos-d->iLastPos+POS_BASE);  d->iLastPos = iPos;}/* Add a position to the last position list in a doclist. */static void docListAddPos(DocList *d, int iColumn, int iPos){  assert( d->iType==DL_POSITIONS );  addPos(d, iColumn, iPos);  appendVarint(d, POS_END);  /* add new terminator */}/*** Add a position and starting and ending offsets to a doclist.**** If the doclist is setup to handle only positions, then insert** the position only and ignore the offsets.*/static void docListAddPosOffset(  DocList *d,             /* Doclist under construction */  int iColumn,            /* Column the inserted term is part of */  int iPos,               /* Position of the inserted term */  int iStartOffset,       /* Starting offset of inserted term */  int iEndOffset          /* Ending offset of inserted term */){  assert( d->iType>=DL_POSITIONS );  addPos(d, iColumn, iPos);  if( d->iType==DL_POSITIONS_OFFSETS ){    assert( iStartOffset>=d->iLastOffset );    appendVarint(d, iStartOffset-d->iLastOffset);    d->iLastOffset = iStartOffset;    assert( iEndOffset>=iStartOffset );    appendVarint(d, iEndOffset-iStartOffset);  }  appendVarint(d, POS_END);  /* add new terminator */}/*** A DocListReader object is a cursor into a doclist.  Initialize** the cursor to the beginning of the doclist by calling readerInit().** Then use routines****      peekDocid()**      readDocid()**      readPosition()**      skipPositionList()**      and so forth...**** to read information out of the doclist.  When we reach the end** of the doclist, atEnd() returns TRUE.*/typedef struct DocListReader {  DocList *pDoclist;  /* The document list we are stepping through */  char *p;            /* Pointer to next unread byte in the doclist */  int iLastColumn;  int iLastPos;  /* the last position read, or -1 when not in a position list */} DocListReader;/*** Initialize the DocListReader r to point to the beginning of pDoclist.*/static void readerInit(DocListReader *r, DocList *pDoclist){  r->pDoclist = pDoclist;  if( pDoclist!=NULL ){    r->p = pDoclist->pData;  }  r->iLastColumn = -1;  r->iLastPos = -1;}/*** Return TRUE if we have reached then end of pReader and there is** nothing else left to read.*/static int atEnd(DocListReader *pReader){  return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));}/* Peek at the next docid without advancing the read pointer. */static sqlite_int64 peekDocid(DocListReader *pReader){  sqlite_int64 ret;  assert( !atEnd(pReader) );  assert( pReader->iLastPos==-1 );  getVarint(pReader->p, &ret);  return ret;}/* Read the next docid.   See also nextDocid().*/static sqlite_int64 readDocid(DocListReader *pReader){  sqlite_int64 ret;  assert( !atEnd(pReader) );  assert( pReader->iLastPos==-1 );  pReader->p += getVarint(pReader->p, &ret);  if( pReader->pDoclist->iType>=DL_POSITIONS ){    pReader->iLastColumn = 0;    pReader->iLastPos = 0;  }  return ret;}/* Read the next position and column index from a position list. * Returns the position, or -1 at the end of the list. */static int readPosition(DocListReader *pReader, int *iColumn){  int i;  int iType = pReader->pDoclist->iType;  if( pReader->iLastPos==-1 ){    return -1;  }  assert( !atEnd(pReader) );  if( iType<DL_POSITIONS ){    return -1;  }  pReader->p += getVarint32(pReader->p, &i);  if( i==POS_END ){    pReader->iLastColumn = pReader->iLastPos = -1;    *iColumn = -1;    return -1;  }  if( i==POS_COLUMN ){    pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);    pReader->iLastPos = 0;    pReader->p += getVarint32(pReader->p, &i);    assert( i>=POS_BASE );  }  pReader->iLastPos += ((int) i)-POS_BASE;  if( iType>=DL_POSITIONS_OFFSETS ){    /* Skip over offsets, ignoring them for now. */    int iStart, iEnd;    pReader->p += getVarint32(pReader->p, &iStart);    pReader->p += getVarint32(pReader->p, &iEnd);  }  *iColumn = pReader->iLastColumn;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -