📄 fts2.c

📁 轻量级数据库软件,嵌入式设计可以考虑考虑,性能不错
💻 C
📖 第 1 页 / 共 5 页
字号:
  int iEndOffset;      /* the last end offset read */} PLReader;static int plrAtEnd(PLReader *pReader){  return pReader->pData==NULL;}static int plrColumn(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iColumn;}static int plrPosition(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iPosition;}static int plrStartOffset(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iStartOffset;}static int plrEndOffset(PLReader *pReader){  assert( !plrAtEnd(pReader) );  return pReader->iEndOffset;}static void plrStep(PLReader *pReader){  int i, n;  assert( !plrAtEnd(pReader) );  if( pReader->nData==0 ){    pReader->pData = NULL;    return;  }  n = getVarint32(pReader->pData, &i);  if( i==POS_COLUMN ){    n += getVarint32(pReader->pData+n, &pReader->iColumn);    pReader->iPosition = 0;    pReader->iStartOffset = 0;    n += getVarint32(pReader->pData+n, &i);  }  /* Should never see adjacent column changes. */  assert( i!=POS_COLUMN );  if( i==POS_END ){    pReader->nData = 0;    pReader->pData = NULL;    return;  }  pReader->iPosition += i-POS_BASE;  if( pReader->iType==DL_POSITIONS_OFFSETS ){    n += getVarint32(pReader->pData+n, &i);    pReader->iStartOffset += i;    n += getVarint32(pReader->pData+n, &i);    pReader->iEndOffset = pReader->iStartOffset+i;  }  assert( n<=pReader->nData );  pReader->pData += n;  pReader->nData -= n;}static void plrInit(PLReader *pReader, DocListType iType,                    const char *pData, int nData){  pReader->pData = pData;  pReader->nData = nData;  pReader->iType = iType;  pReader->iColumn = 0;  pReader->iPosition = 0;  pReader->iStartOffset = 0;  pReader->iEndOffset = 0;  plrStep(pReader);}static void plrDestroy(PLReader *pReader){  SCRAMBLE(pReader);}/*******************************************************************//* PLWriter is used in constructing a document's position list.  As a** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.**** plwInit - init for writing a document's poslist.** plwReset - reset the writer for a new document.** plwDestroy - clear a writer.** plwNew - malloc storage and initialize it.** plwDelete - clear and free storage.** plwDlwAdd - append the docid and poslist to a doclist writer.** plwAdd - append position and offset information.*//* TODO(shess) PLWriter is used in two ways.  fulltextUpdate() uses it** in construction of a new doclist.  docListTrim() and mergePosList()** use it when trimming.  In the former case, it wants to own the** DataBuffer, in the latter it's possible it could encode into a** pre-existing DataBuffer.*/typedef struct PLWriter {  DataBuffer b;  sqlite_int64 iDocid;  DocListType iType;  int iColumn;    /* the last column written */  int iPos;       /* the last position written */  int iOffset;    /* the last start offset written */} PLWriter;static void plwDlwAdd(PLWriter *pWriter, DLWriter *dlWriter){  dlwAdd(dlWriter, pWriter->iDocid, pWriter->b.pData, pWriter->b.nData);}static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,                   int iStartOffset, int iEndOffset){  /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,  ** iStartOffsetDelta, and iEndOffsetDelta.  */  char c[5*VARINT_MAX];  int n = 0;  if( pWriter->iType==DL_DOCIDS ) return;  if( iColumn!=pWriter->iColumn ){    n += putVarint(c+n, POS_COLUMN);    n += putVarint(c+n, iColumn);    pWriter->iColumn = iColumn;    pWriter->iPos = 0;    pWriter->iOffset = 0;  }  assert( iPos>=pWriter->iPos );  n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));  pWriter->iPos = iPos;  if( pWriter->iType==DL_POSITIONS_OFFSETS ){    assert( iStartOffset>=pWriter->iOffset );    n += putVarint(c+n, iStartOffset-pWriter->iOffset);    pWriter->iOffset = iStartOffset;    assert( iEndOffset>=iStartOffset );    n += putVarint(c+n, iEndOffset-iStartOffset);  }  dataBufferAppend(&pWriter->b, c, n);}static void plwReset(PLWriter *pWriter,                     sqlite_int64 iDocid, DocListType iType){  dataBufferReset(&pWriter->b);  pWriter->iDocid = iDocid;  pWriter->iType = iType;  pWriter->iColumn = 0;  pWriter->iPos = 0;  pWriter->iOffset = 0;}static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){  dataBufferInit(&pWriter->b, 0);  plwReset(pWriter, iDocid, iType);}static PLWriter *plwNew(sqlite_int64 iDocid, DocListType iType){  PLWriter *pWriter = malloc(sizeof(PLWriter));  plwInit(pWriter, iDocid, iType);  return pWriter;}static void plwDestroy(PLWriter *pWriter){  dataBufferDestroy(&pWriter->b);  SCRAMBLE(pWriter);}static void plwDelete(PLWriter *pWriter){  plwDestroy(pWriter);  free(pWriter);}/* Copy the doclist data of iType in pData/nData into *out, trimming** unnecessary data as we go.  Only columns matching iColumn are** copied, all columns copied if iColimn is -1.  Elements with no** matching columns are dropped.  The output is an iOutType doclist.*/static void docListTrim(DocListType iType, const char *pData, int nData,                        int iColumn, DocListType iOutType, DataBuffer *out){  DLReader dlReader;  DLWriter dlWriter;  PLWriter plWriter;  assert( iOutType<=iType );  dlrInit(&dlReader, iType, pData, nData);  dlwInit(&dlWriter, iOutType, out);  plwInit(&plWriter, 0, iOutType);  while( !dlrAtEnd(&dlReader) ){    PLReader plReader;    int match = 0;    plrInit(&plReader, dlReader.iType,            dlrPosData(&dlReader), dlrPosDataLen(&dlReader));    plwReset(&plWriter, dlrDocid(&dlReader), iOutType);    while( !plrAtEnd(&plReader) ){      if( iColumn==-1 || plrColumn(&plReader)==iColumn ){        match = 1;        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),               plrStartOffset(&plReader), plrEndOffset(&plReader));      }      plrStep(&plReader);    }    if( match ) plwDlwAdd(&plWriter, &dlWriter);    plrDestroy(&plReader);    dlrStep(&dlReader);  }  plwDestroy(&plWriter);  dlwDestroy(&dlWriter);  dlrDestroy(&dlReader);}/* Used by docListMerge() to keep doclists in the ascending order by** docid, then ascending order by age (so the newest comes first).*/typedef struct OrderedDLReader {  DLReader *pReader;  /* TODO(shess) If we assume that docListMerge pReaders is ordered by  ** age (which we do), then we could use pReader comparisons to break  ** ties.  */  int idx;} OrderedDLReader;/* Order eof to end, then by docid asc, idx desc. */static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){  if( dlrAtEnd(r1->pReader) ){    if( dlrAtEnd(r2->pReader) ) return 0;  /* Both atEnd(). */    return 1;                              /* Only r1 atEnd(). */  }  if( dlrAtEnd(r2->pReader) ) return -1;   /* Only r2 atEnd(). */  if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1;  if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1;  /* Descending on idx. */  return r2->idx-r1->idx;}/* Bubble p[0] to appropriate place in p[1..n-1].  Assumes that** p[1..n-1] is already sorted.*//* TODO(shess) Is this frequent enough to warrant a binary search?** Before implementing that, instrument the code to check.  In most** current usage, I expect that p[0] will be less than p[1] a very** high proportion of the time.*/static void orderedDLReaderReorder(OrderedDLReader *p, int n){  while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){    OrderedDLReader tmp = p[0];    p[0] = p[1];    p[1] = tmp;    n--;    p++;  }}/* Given an array of doclist readers, merge their doclist elements** into out in sorted order (by docid), dropping elements from older** readers when there is a duplicate docid.  pReaders is assumed to be** ordered by age, oldest first.*//* TODO(shess) nReaders must be <= MERGE_COUNT.  This should probably** be fixed.*/static void docListMerge(DataBuffer *out,                         DLReader *pReaders, int nReaders){  OrderedDLReader readers[MERGE_COUNT];  DLWriter writer;  int i, n;  const char *pStart = 0;  int nStart = 0;  sqlite_int64 iFirstDocid = 0, iLastDocid = 0;  assert( nReaders>0 );  if( nReaders==1 ){    dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders));    return;  }  assert( nReaders<=MERGE_COUNT );  n = 0;  for(i=0; i<nReaders; i++){    assert( pReaders[i].iType==pReaders[0].iType );    readers[i].pReader = pReaders+i;    readers[i].idx = i;    n += dlrAllDataBytes(&pReaders[i]);  }  /* Conservatively size output to sum of inputs.  Output should end  ** up strictly smaller than input.  */  dataBufferExpand(out, n);  /* Get the readers into sorted order. */  while( i-->0 ){    orderedDLReaderReorder(readers+i, nReaders-i);  }  dlwInit(&writer, pReaders[0].iType, out);  while( !dlrAtEnd(readers[0].pReader) ){    sqlite_int64 iDocid = dlrDocid(readers[0].pReader);    /* If this is a continuation of the current buffer to copy, extend    ** that buffer.  memcpy() seems to be more efficient if it has a    ** lots of data to copy.    */    if( dlrDocData(readers[0].pReader)==pStart+nStart ){      nStart += dlrDocDataBytes(readers[0].pReader);    }else{      if( pStart!=0 ){        dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);      }      pStart = dlrDocData(readers[0].pReader);      nStart = dlrDocDataBytes(readers[0].pReader);      iFirstDocid = iDocid;    }    iLastDocid = iDocid;    dlrStep(readers[0].pReader);    /* Drop all of the older elements with the same docid. */    for(i=1; i<nReaders &&             !dlrAtEnd(readers[i].pReader) &&             dlrDocid(readers[i].pReader)==iDocid; i++){      dlrStep(readers[i].pReader);    }    /* Get the readers back into order. */    while( i-->0 ){      orderedDLReaderReorder(readers+i, nReaders-i);    }  }  /* Copy over any remaining elements. */  if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);  dlwDestroy(&writer);}/* pLeft and pRight are DLReaders positioned to the same docid.**** If there are no instances in pLeft or pRight where the position** of pLeft is one less than the position of pRight, then this** routine adds nothing to pOut.**** If there are one or more instances where positions from pLeft** are exactly one less than positions from pRight, then add a new** document record to pOut.  If pOut wants to hold positions, then** include the positions from pRight that are one more than a** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.*/static void mergePosList(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){  PLReader left, right;  PLWriter writer;  int match = 0;  assert( dlrDocid(pLeft)==dlrDocid(pRight) );  assert( pOut->iType!=DL_POSITIONS_OFFSETS );  plrInit(&left, pLeft->iType, dlrPosData(pLeft), dlrPosDataLen(pLeft));  plrInit(&right, pRight->iType, dlrPosData(pRight), dlrPosDataLen(pRight));  plwInit(&writer, dlrDocid(pLeft), pOut->iType);  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){    if( plrColumn(&left)<plrColumn(&right) ){      plrStep(&left);    }else if( plrColumn(&left)>plrColumn(&right) ){      plrStep(&right);    }else if( plrPosition(&left)+1<plrPosition(&right) ){      plrStep(&left);    }else if( plrPosition(&left)+1>plrPosition(&right) ){      plrStep(&right);    }else{      match = 1;      plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);      plrStep(&left);      plrStep(&right);    }  }  /* TODO(shess) We could remember the output position, encode the  ** docid, then encode the poslist directly into the output.  If no  ** match, we back out to the stored output position.  This would  ** also reduce the malloc count.  */  if( match ) plwDlwAdd(&writer, pOut);  plrDestroy(&left);  plrDestroy(&right);  plwDestroy(&writer);}/* We have two doclists with positions:  pLeft and pRight.** Write the phrase intersection of these two doclists into pOut.**** A phrase intersection means that two documents only match** if pLeft.iPos+1==pRight.iPos.**** iType controls the type of data written to pOut.  If iType is** DL_POSITIONS, the positions are those from pRight.*/static void docListPhraseMerge(  const char *pLeft, int nLeft,  const char *pRight, int nRight,  DocListType iType,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -