📄 fts2.c
字号:
int iEndOffset; /* the last end offset read */} PLReader;static int plrAtEnd(PLReader *pReader){ return pReader->pData==NULL;}static int plrColumn(PLReader *pReader){ assert( !plrAtEnd(pReader) ); return pReader->iColumn;}static int plrPosition(PLReader *pReader){ assert( !plrAtEnd(pReader) ); return pReader->iPosition;}static int plrStartOffset(PLReader *pReader){ assert( !plrAtEnd(pReader) ); return pReader->iStartOffset;}static int plrEndOffset(PLReader *pReader){ assert( !plrAtEnd(pReader) ); return pReader->iEndOffset;}static void plrStep(PLReader *pReader){ int i, n; assert( !plrAtEnd(pReader) ); if( pReader->nData==0 ){ pReader->pData = NULL; return; } n = getVarint32(pReader->pData, &i); if( i==POS_COLUMN ){ n += getVarint32(pReader->pData+n, &pReader->iColumn); pReader->iPosition = 0; pReader->iStartOffset = 0; n += getVarint32(pReader->pData+n, &i); } /* Should never see adjacent column changes. */ assert( i!=POS_COLUMN ); if( i==POS_END ){ pReader->nData = 0; pReader->pData = NULL; return; } pReader->iPosition += i-POS_BASE; if( pReader->iType==DL_POSITIONS_OFFSETS ){ n += getVarint32(pReader->pData+n, &i); pReader->iStartOffset += i; n += getVarint32(pReader->pData+n, &i); pReader->iEndOffset = pReader->iStartOffset+i; } assert( n<=pReader->nData ); pReader->pData += n; pReader->nData -= n;}static void plrInit(PLReader *pReader, DocListType iType, const char *pData, int nData){ pReader->pData = pData; pReader->nData = nData; pReader->iType = iType; pReader->iColumn = 0; pReader->iPosition = 0; pReader->iStartOffset = 0; pReader->iEndOffset = 0; plrStep(pReader);}static void plrDestroy(PLReader *pReader){ SCRAMBLE(pReader);}/*******************************************************************//* PLWriter is used in constructing a document's position list. As a** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.**** plwInit - init for writing a document's poslist.** plwReset - reset the writer for a new document.** plwDestroy - clear a writer.** plwNew - malloc storage and initialize it.** plwDelete - clear and free storage.** plwDlwAdd - append the docid and poslist to a doclist writer.** plwAdd - append position and offset information.*//* TODO(shess) PLWriter is used in two ways. fulltextUpdate() uses it** in construction of a new doclist. docListTrim() and mergePosList()** use it when trimming. In the former case, it wants to own the** DataBuffer, in the latter it's possible it could encode into a** pre-existing DataBuffer.*/typedef struct PLWriter { DataBuffer b; sqlite_int64 iDocid; DocListType iType; int iColumn; /* the last column written */ int iPos; /* the last position written */ int iOffset; /* the last start offset written */} PLWriter;static void plwDlwAdd(PLWriter *pWriter, DLWriter *dlWriter){ dlwAdd(dlWriter, pWriter->iDocid, pWriter->b.pData, pWriter->b.nData);}static void plwAdd(PLWriter *pWriter, int iColumn, int iPos, int iStartOffset, int iEndOffset){ /* Worst-case space for POS_COLUMN, iColumn, iPosDelta, ** iStartOffsetDelta, and iEndOffsetDelta. */ char c[5*VARINT_MAX]; int n = 0; if( pWriter->iType==DL_DOCIDS ) return; if( iColumn!=pWriter->iColumn ){ n += putVarint(c+n, POS_COLUMN); n += putVarint(c+n, iColumn); pWriter->iColumn = iColumn; pWriter->iPos = 0; pWriter->iOffset = 0; } assert( iPos>=pWriter->iPos ); n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos)); pWriter->iPos = iPos; if( pWriter->iType==DL_POSITIONS_OFFSETS ){ assert( iStartOffset>=pWriter->iOffset ); n += putVarint(c+n, iStartOffset-pWriter->iOffset); pWriter->iOffset = iStartOffset; assert( iEndOffset>=iStartOffset ); n += putVarint(c+n, iEndOffset-iStartOffset); } dataBufferAppend(&pWriter->b, c, n);}static void plwReset(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){ dataBufferReset(&pWriter->b); pWriter->iDocid = iDocid; pWriter->iType = iType; pWriter->iColumn = 0; pWriter->iPos = 0; pWriter->iOffset = 0;}static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){ dataBufferInit(&pWriter->b, 0); plwReset(pWriter, iDocid, iType);}static PLWriter *plwNew(sqlite_int64 iDocid, DocListType iType){ PLWriter *pWriter = malloc(sizeof(PLWriter)); plwInit(pWriter, iDocid, iType); return pWriter;}static void plwDestroy(PLWriter *pWriter){ dataBufferDestroy(&pWriter->b); SCRAMBLE(pWriter);}static void plwDelete(PLWriter *pWriter){ plwDestroy(pWriter); free(pWriter);}/* Copy the doclist data of iType in pData/nData into *out, trimming** unnecessary data as we go. Only columns matching iColumn are** copied, all columns copied if iColimn is -1. Elements with no** matching columns are dropped. The output is an iOutType doclist.*/static void docListTrim(DocListType iType, const char *pData, int nData, int iColumn, DocListType iOutType, DataBuffer *out){ DLReader dlReader; DLWriter dlWriter; PLWriter plWriter; assert( iOutType<=iType ); dlrInit(&dlReader, iType, pData, nData); dlwInit(&dlWriter, iOutType, out); plwInit(&plWriter, 0, iOutType); while( !dlrAtEnd(&dlReader) ){ PLReader plReader; int match = 0; plrInit(&plReader, dlReader.iType, dlrPosData(&dlReader), dlrPosDataLen(&dlReader)); plwReset(&plWriter, dlrDocid(&dlReader), iOutType); while( !plrAtEnd(&plReader) ){ if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ match = 1; plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), plrStartOffset(&plReader), plrEndOffset(&plReader)); } plrStep(&plReader); } if( match ) plwDlwAdd(&plWriter, &dlWriter); plrDestroy(&plReader); dlrStep(&dlReader); } plwDestroy(&plWriter); dlwDestroy(&dlWriter); dlrDestroy(&dlReader);}/* Used by docListMerge() to keep doclists in the ascending order by** docid, then ascending order by age (so the newest comes first).*/typedef struct OrderedDLReader { DLReader *pReader; /* TODO(shess) If we assume that docListMerge pReaders is ordered by ** age (which we do), then we could use pReader comparisons to break ** ties. */ int idx;} OrderedDLReader;/* Order eof to end, then by docid asc, idx desc. */static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){ if( dlrAtEnd(r1->pReader) ){ if( dlrAtEnd(r2->pReader) ) return 0; /* Both atEnd(). */ return 1; /* Only r1 atEnd(). */ } if( dlrAtEnd(r2->pReader) ) return -1; /* Only r2 atEnd(). */ if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1; if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1; /* Descending on idx. */ return r2->idx-r1->idx;}/* Bubble p[0] to appropriate place in p[1..n-1]. Assumes that** p[1..n-1] is already sorted.*//* TODO(shess) Is this frequent enough to warrant a binary search?** Before implementing that, instrument the code to check. In most** current usage, I expect that p[0] will be less than p[1] a very** high proportion of the time.*/static void orderedDLReaderReorder(OrderedDLReader *p, int n){ while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){ OrderedDLReader tmp = p[0]; p[0] = p[1]; p[1] = tmp; n--; p++; }}/* Given an array of doclist readers, merge their doclist elements** into out in sorted order (by docid), dropping elements from older** readers when there is a duplicate docid. pReaders is assumed to be** ordered by age, oldest first.*//* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably** be fixed.*/static void docListMerge(DataBuffer *out, DLReader *pReaders, int nReaders){ OrderedDLReader readers[MERGE_COUNT]; DLWriter writer; int i, n; const char *pStart = 0; int nStart = 0; sqlite_int64 iFirstDocid = 0, iLastDocid = 0; assert( nReaders>0 ); if( nReaders==1 ){ dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); return; } assert( nReaders<=MERGE_COUNT ); n = 0; for(i=0; i<nReaders; i++){ assert( pReaders[i].iType==pReaders[0].iType ); readers[i].pReader = pReaders+i; readers[i].idx = i; n += dlrAllDataBytes(&pReaders[i]); } /* Conservatively size output to sum of inputs. Output should end ** up strictly smaller than input. */ dataBufferExpand(out, n); /* Get the readers into sorted order. */ while( i-->0 ){ orderedDLReaderReorder(readers+i, nReaders-i); } dlwInit(&writer, pReaders[0].iType, out); while( !dlrAtEnd(readers[0].pReader) ){ sqlite_int64 iDocid = dlrDocid(readers[0].pReader); /* If this is a continuation of the current buffer to copy, extend ** that buffer. memcpy() seems to be more efficient if it has a ** lots of data to copy. */ if( dlrDocData(readers[0].pReader)==pStart+nStart ){ nStart += dlrDocDataBytes(readers[0].pReader); }else{ if( pStart!=0 ){ dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); } pStart = dlrDocData(readers[0].pReader); nStart = dlrDocDataBytes(readers[0].pReader); iFirstDocid = iDocid; } iLastDocid = iDocid; dlrStep(readers[0].pReader); /* Drop all of the older elements with the same docid. */ for(i=1; i<nReaders && !dlrAtEnd(readers[i].pReader) && dlrDocid(readers[i].pReader)==iDocid; i++){ dlrStep(readers[i].pReader); } /* Get the readers back into order. */ while( i-->0 ){ orderedDLReaderReorder(readers+i, nReaders-i); } } /* Copy over any remaining elements. */ if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); dlwDestroy(&writer);}/* pLeft and pRight are DLReaders positioned to the same docid.**** If there are no instances in pLeft or pRight where the position** of pLeft is one less than the position of pRight, then this** routine adds nothing to pOut.**** If there are one or more instances where positions from pLeft** are exactly one less than positions from pRight, then add a new** document record to pOut. If pOut wants to hold positions, then** include the positions from pRight that are one more than a** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.*/static void mergePosList(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ PLReader left, right; PLWriter writer; int match = 0; assert( dlrDocid(pLeft)==dlrDocid(pRight) ); assert( pOut->iType!=DL_POSITIONS_OFFSETS ); plrInit(&left, pLeft->iType, dlrPosData(pLeft), dlrPosDataLen(pLeft)); plrInit(&right, pRight->iType, dlrPosData(pRight), dlrPosDataLen(pRight)); plwInit(&writer, dlrDocid(pLeft), pOut->iType); while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ if( plrColumn(&left)<plrColumn(&right) ){ plrStep(&left); }else if( plrColumn(&left)>plrColumn(&right) ){ plrStep(&right); }else if( plrPosition(&left)+1<plrPosition(&right) ){ plrStep(&left); }else if( plrPosition(&left)+1>plrPosition(&right) ){ plrStep(&right); }else{ match = 1; plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); plrStep(&left); plrStep(&right); } } /* TODO(shess) We could remember the output position, encode the ** docid, then encode the poslist directly into the output. If no ** match, we back out to the stored output position. This would ** also reduce the malloc count. */ if( match ) plwDlwAdd(&writer, pOut); plrDestroy(&left); plrDestroy(&right); plwDestroy(&writer);}/* We have two doclists with positions: pLeft and pRight.** Write the phrase intersection of these two doclists into pOut.**** A phrase intersection means that two documents only match** if pLeft.iPos+1==pRight.iPos.**** iType controls the type of data written to pOut. If iType is** DL_POSITIONS, the positions are those from pRight.*/static void docListPhraseMerge( const char *pLeft, int nLeft, const char *pRight, int nRight, DocListType iType,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -