📄 fts2.c

📁 sqlite-3.4.1,嵌入式数据库.是一个功能强大的开源数据库,给学习和研发以及小型公司的发展带来了全所未有的好处.
💻 C
📖 第 1 页 / 共 5 页
字号:
static void dlwCopy(DLWriter *pWriter, DLReader *pReader){  dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),            dlrDocid(pReader), dlrDocid(pReader));}static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){  char c[VARINT_MAX];  int n = putVarint(c, iDocid-pWriter->iPrevDocid);  /* Docids must ascend. */  assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );  assert( pWriter->iType==DL_DOCIDS );  dataBufferAppend(pWriter->b, c, n);  pWriter->iPrevDocid = iDocid;#ifndef NDEBUG  pWriter->has_iPrevDocid = 1;#endif}/*******************************************************************//* PLReader is used to read data from a document's position list.  As** the caller steps through the list, data is cached so that varints** only need to be decoded once.**** plrInit, plrDestroy - create/destroy a reader.** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors** plrAtEnd - at end of stream, only call plrDestroy once true.** plrStep - step to the next element.*/typedef struct PLReader {  /* These refer to the next position's data.  nData will reach 0 when  ** reading the last position, so plrStep() signals EOF by setting  ** pData to NULL.  */  const char *pData;  int nData;  DocListType iType;  int iColumn;         /* the last column read */  int iPosition;       /* the last position read */  int iStartOffset;    /* the last start offset read */  int iEndOffset;      /* the last end offset read */} PLReader;static int plrAtEnd(PLReader *pReader){  return pReader->pData==NULL;}static int plrColumn(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iColumn;}static int plrPosition(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iPosition;}static int plrStartOffset(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iStartOffset;}static int plrEndOffset(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iEndOffset;}static void plrStep(PLReader *pReader){  int i, n;  assert( !plrAtEnd(pReader) );  if( pReader->nData==0 ){    pReader->pData = NULL;    return;  }  n = getVarint32(pReader->pData, &i);  if( i==POS_COLUMN ){    n += getVarint32(pReader->pData+n, &pReader->iColumn);    pReader->iPosition = 0;    pReader->iStartOffset = 0;    n += getVarint32(pReader->pData+n, &i);  }  /* Should never see adjacent column changes. */  assert( i!=POS_COLUMN );  if( i==POS_END ){    pReader->nData = 0;    pReader->pData = NULL;    return;  }  pReader->iPosition += i-POS_BASE;  if( pReader->iType==DL_POSITIONS_OFFSETS ){    n += getVarint32(pReader->pData+n, &i);    pReader->iStartOffset += i;    n += getVarint32(pReader->pData+n, &i);    pReader->iEndOffset = pReader->iStartOffset+i;  }  assert( n<=pReader->nData );  pReader->pData += n;  pReader->nData -= n;}static void plrInit(PLReader *pReader, DLReader *pDLReader){  pReader->pData = dlrPosData(pDLReader);  pReader->nData = dlrPosDataLen(pDLReader);  pReader->iType = pDLReader->iType;  pReader->iColumn = 0;  pReader->iPosition = 0;  pReader->iStartOffset = 0;  pReader->iEndOffset = 0;  plrStep(pReader);}static void plrDestroy(PLReader *pReader){  SCRAMBLE(pReader);}/*******************************************************************//* PLWriter is used in constructing a document's position list.  As a** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.** PLWriter writes to the associated DLWriter's buffer.**** plwInit - init for writing a document's poslist.** plwDestroy - clear a writer.** plwAdd - append position and offset information.** plwCopy - copy next position's data from reader to writer.** plwTerminate - add any necessary doclist terminator.**** Calling plwAdd() after plwTerminate() may result in a corrupt** doclist.*//* TODO(shess) Until we've written the second item, we can cache the** first item's information.  Then we'd have three states:**** - initialized with docid, no positions.** - docid and one position.** - docid and multiple positions.**** Only the last state needs to actually write to dlw->b, which would** be an improvement in the DLCollector case.*/typedef struct PLWriter {  DLWriter *dlw;  int iColumn;    /* the last column written */  int iPos;       /* the last position written */  int iOffset;    /* the last start offset written */} PLWriter;/* TODO(shess) In the case where the parent is reading these values** from a PLReader, we could optimize to a copy if that PLReader has** the same type as pWriter.*/static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,                   int iStartOffset, int iEndOffset){  /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,  ** iStartOffsetDelta, and iEndOffsetDelta.  */  char c[5*VARINT_MAX];  int n = 0;  /* Ban plwAdd() after plwTerminate(). */  assert( pWriter->iPos!=-1 );  if( pWriter->dlw->iType==DL_DOCIDS ) return;  if( iColumn!=pWriter->iColumn ){    n += putVarint(c+n, POS_COLUMN);    n += putVarint(c+n, iColumn);    pWriter->iColumn = iColumn;    pWriter->iPos = 0;    pWriter->iOffset = 0;  }  assert( iPos>=pWriter->iPos );  n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));  pWriter->iPos = iPos;  if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){    assert( iStartOffset>=pWriter->iOffset );    n += putVarint(c+n, iStartOffset-pWriter->iOffset);    pWriter->iOffset = iStartOffset;    assert( iEndOffset>=iStartOffset );    n += putVarint(c+n, iEndOffset-iStartOffset);  }  dataBufferAppend(pWriter->dlw->b, c, n);}static void plwCopy(PLWriter *pWriter, PLReader *pReader){  plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),         plrStartOffset(pReader), plrEndOffset(pReader));}static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){  char c[VARINT_MAX];  int n;  pWriter->dlw = dlw;  /* Docids must ascend. */  assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid );  n = putVarint(c, iDocid-pWriter->dlw->iPrevDocid);  dataBufferAppend(pWriter->dlw->b, c, n);  pWriter->dlw->iPrevDocid = iDocid;#ifndef NDEBUG  pWriter->dlw->has_iPrevDocid = 1;#endif  pWriter->iColumn = 0;  pWriter->iPos = 0;  pWriter->iOffset = 0;}/* TODO(shess) Should plwDestroy() also terminate the doclist?  But** then plwDestroy() would no longer be just a destructor, it would** also be doing work, which isn't consistent with the overall idiom.** Another option would be for plwAdd() to always append any necessary** terminator, so that the output is always correct.  But that would** add incremental work to the common case with the only benefit being** API elegance.  Punt for now.*/static void plwTerminate(PLWriter *pWriter){  if( pWriter->dlw->iType>DL_DOCIDS ){    char c[VARINT_MAX];    int n = putVarint(c, POS_END);    dataBufferAppend(pWriter->dlw->b, c, n);  }#ifndef NDEBUG  /* Mark as terminated for assert in plwAdd(). */  pWriter->iPos = -1;#endif}static void plwDestroy(PLWriter *pWriter){  SCRAMBLE(pWriter);}/*******************************************************************//* DLCollector wraps PLWriter and DLWriter to provide a** dynamically-allocated doclist area to use during tokenization.**** dlcNew - malloc up and initialize a collector.** dlcDelete - destroy a collector and all contained items.** dlcAddPos - append position and offset information.** dlcAddDoclist - add the collected doclist to the given buffer.** dlcNext - terminate the current document and open another.*/typedef struct DLCollector {  DataBuffer b;  DLWriter dlw;  PLWriter plw;} DLCollector;/* TODO(shess) This could also be done by calling plwTerminate() and** dataBufferAppend().  I tried that, expecting nominal performance** differences, but it seemed to pretty reliably be worth 1% to code** it this way.  I suspect it's the incremental malloc overhead (some** percentage of the plwTerminate() calls will cause a realloc), so** this might be worth revisiting if the DataBuffer implementation** changes.*/static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){  if( pCollector->dlw.iType>DL_DOCIDS ){    char c[VARINT_MAX];    int n = putVarint(c, POS_END);    dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);  }else{    dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);  }}static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){  plwTerminate(&pCollector->plw);  plwDestroy(&pCollector->plw);  plwInit(&pCollector->plw, &pCollector->dlw, iDocid);}static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,                      int iStartOffset, int iEndOffset){  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);}static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){  DLCollector *pCollector = malloc(sizeof(DLCollector));  dataBufferInit(&pCollector->b, 0);  dlwInit(&pCollector->dlw, iType, &pCollector->b);  plwInit(&pCollector->plw, &pCollector->dlw, iDocid);  return pCollector;}static void dlcDelete(DLCollector *pCollector){  plwDestroy(&pCollector->plw);  dlwDestroy(&pCollector->dlw);  dataBufferDestroy(&pCollector->b);  SCRAMBLE(pCollector);  free(pCollector);}/* Copy the doclist data of iType in pData/nData into *out, trimming** unnecessary data as we go.  Only columns matching iColumn are** copied, all columns copied if iColumn is -1.  Elements with no** matching columns are dropped.  The output is an iOutType doclist.*//* NOTE(shess) This code is only valid after all doclists are merged.** If this is run before merges, then doclist items which represent** deletion will be trimmed, and will thus not effect a deletion** during the merge.*/static void docListTrim(DocListType iType, const char *pData, int nData,                        int iColumn, DocListType iOutType, DataBuffer *out){  DLReader dlReader;  DLWriter dlWriter;  assert( iOutType<=iType );  dlrInit(&dlReader, iType, pData, nData);  dlwInit(&dlWriter, iOutType, out);  while( !dlrAtEnd(&dlReader) ){    PLReader plReader;    PLWriter plWriter;    int match = 0;    plrInit(&plReader, &dlReader);    while( !plrAtEnd(&plReader) ){      if( iColumn==-1 || plrColumn(&plReader)==iColumn ){        if( !match ){          plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));          match = 1;        }        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),               plrStartOffset(&plReader), plrEndOffset(&plReader));      }      plrStep(&plReader);    }    if( match ){      plwTerminate(&plWriter);      plwDestroy(&plWriter);    }    plrDestroy(&plReader);    dlrStep(&dlReader);  }  dlwDestroy(&dlWriter);  dlrDestroy(&dlReader);}/* Used by docListMerge() to keep doclists in the ascending order by** docid, then ascending order by age (so the newest comes first).*/typedef struct OrderedDLReader {  DLReader *pReader;  /* TODO(shess) If we assume that docListMerge pReaders is ordered by  ** age (which we do), then we could use pReader comparisons to break  ** ties.  */  int idx;} OrderedDLReader;/* Order eof to end, then by docid asc, idx desc. */static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){  if( dlrAtEnd(r1->pReader) ){    if( dlrAtEnd(r2->pReader) ) return 0;  /* Both atEnd(). */    return 1;                              /* Only r1 atEnd(). */  }  if( dlrAtEnd(r2->pReader) ) return -1;   /* Only r2 atEnd(). */  if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1;  if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1;  /* Descending on idx. */  return r2->idx-r1->idx;}/* Bubble p[0] to appropriate place in p[1..n-1].  Assumes that** p[1..n-1] is already sorted.*//* TODO(shess) Is this frequent enough to warrant a binary search?** Before implementing that, instrument the code to check.  In most** current usage, I expect that p[0] will be less than p[1] a very** high proportion of the time.*/static void orderedDLReaderReorder(OrderedDLReader *p, int n){  while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){    OrderedDLReader tmp = p[0];    p[0] = p[1];    p[1] = tmp;    n--;    p++;  }}/* Given an array of doclist readers, merge their doclist elements** into out in sorted order (by docid), dropping elements from older** readers when there is a duplicate docid.  pReaders is assumed to be** ordered by age, oldest first.*//* TODO(shess) nReaders must be <= MERGE_COUNT.  This should probably** be fixed.*/static void docListMerge(DataBuffer *out,                         DLReader *pReaders, int nReaders){  OrderedDLReader readers[MERGE_COUNT];
💿 文件大小 2186 K
👤 上传用户 hahuhuhuhu
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#sqlite #嵌入式数据库 #发展 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -