📄 fts3.c
字号:
** readers when there is a duplicate docid. pReaders is assumed to be** ordered by age, oldest first.*//* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably** be fixed.*/static void docListMerge(DataBuffer *out, DLReader *pReaders, int nReaders){ OrderedDLReader readers[MERGE_COUNT]; DLWriter writer; int i, n; const char *pStart = 0; int nStart = 0; sqlite_int64 iFirstDocid = 0, iLastDocid = 0; assert( nReaders>0 ); if( nReaders==1 ){ dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); return; } assert( nReaders<=MERGE_COUNT ); n = 0; for(i=0; i<nReaders; i++){ assert( pReaders[i].iType==pReaders[0].iType ); readers[i].pReader = pReaders+i; readers[i].idx = i; n += dlrAllDataBytes(&pReaders[i]); } /* Conservatively size output to sum of inputs. Output should end ** up strictly smaller than input. */ dataBufferExpand(out, n); /* Get the readers into sorted order. */ while( i-->0 ){ orderedDLReaderReorder(readers+i, nReaders-i); } dlwInit(&writer, pReaders[0].iType, out); while( !dlrAtEnd(readers[0].pReader) ){ sqlite_int64 iDocid = dlrDocid(readers[0].pReader); /* If this is a continuation of the current buffer to copy, extend ** that buffer. memcpy() seems to be more efficient if it has a ** lots of data to copy. */ if( dlrDocData(readers[0].pReader)==pStart+nStart ){ nStart += dlrDocDataBytes(readers[0].pReader); }else{ if( pStart!=0 ){ dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); } pStart = dlrDocData(readers[0].pReader); nStart = dlrDocDataBytes(readers[0].pReader); iFirstDocid = iDocid; } iLastDocid = iDocid; dlrStep(readers[0].pReader); /* Drop all of the older elements with the same docid. */ for(i=1; i<nReaders && !dlrAtEnd(readers[i].pReader) && dlrDocid(readers[i].pReader)==iDocid; i++){ dlrStep(readers[i].pReader); } /* Get the readers back into order. */ while( i-->0 ){ orderedDLReaderReorder(readers+i, nReaders-i); } } /* Copy over any remaining elements. */ if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); dlwDestroy(&writer);}/* Helper function for posListUnion(). Compares the current position** between left and right, returning as standard C idiom of <0 if** left<right, >0 if left>right, and 0 if left==right. "End" always** compares greater.*/static int posListCmp(PLReader *pLeft, PLReader *pRight){ assert( pLeft->iType==pRight->iType ); if( pLeft->iType==DL_DOCIDS ) return 0; if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1; if( plrAtEnd(pRight) ) return -1; if( plrColumn(pLeft)<plrColumn(pRight) ) return -1; if( plrColumn(pLeft)>plrColumn(pRight) ) return 1; if( plrPosition(pLeft)<plrPosition(pRight) ) return -1; if( plrPosition(pLeft)>plrPosition(pRight) ) return 1; if( pLeft->iType==DL_POSITIONS ) return 0; if( plrStartOffset(pLeft)<plrStartOffset(pRight) ) return -1; if( plrStartOffset(pLeft)>plrStartOffset(pRight) ) return 1; if( plrEndOffset(pLeft)<plrEndOffset(pRight) ) return -1; if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; return 0;}/* Write the union of position lists in pLeft and pRight to pOut.** "Union" in this case meaning "All unique position tuples". Should** work with any doclist type, though both inputs and the output** should be the same type.*/static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ PLReader left, right; PLWriter writer; assert( dlrDocid(pLeft)==dlrDocid(pRight) ); assert( pLeft->iType==pRight->iType ); assert( pLeft->iType==pOut->iType ); plrInit(&left, pLeft); plrInit(&right, pRight); plwInit(&writer, pOut, dlrDocid(pLeft)); while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ int c = posListCmp(&left, &right); if( c<0 ){ plwCopy(&writer, &left); plrStep(&left); }else if( c>0 ){ plwCopy(&writer, &right); plrStep(&right); }else{ plwCopy(&writer, &left); plrStep(&left); plrStep(&right); } } plwTerminate(&writer); plwDestroy(&writer); plrDestroy(&left); plrDestroy(&right);}/* Write the union of doclists in pLeft and pRight to pOut. For** docids in common between the inputs, the union of the position** lists is written. Inputs and outputs are always type DL_DEFAULT.*/static void docListUnion( const char *pLeft, int nLeft, const char *pRight, int nRight, DataBuffer *pOut /* Write the combined doclist here */){ DLReader left, right; DLWriter writer; if( nLeft==0 ){ if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); return; } if( nRight==0 ){ dataBufferAppend(pOut, pLeft, nLeft); return; } dlrInit(&left, DL_DEFAULT, pLeft, nLeft); dlrInit(&right, DL_DEFAULT, pRight, nRight); dlwInit(&writer, DL_DEFAULT, pOut); while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ if( dlrAtEnd(&right) ){ dlwCopy(&writer, &left); dlrStep(&left); }else if( dlrAtEnd(&left) ){ dlwCopy(&writer, &right); dlrStep(&right); }else if( dlrDocid(&left)<dlrDocid(&right) ){ dlwCopy(&writer, &left); dlrStep(&left); }else if( dlrDocid(&left)>dlrDocid(&right) ){ dlwCopy(&writer, &right); dlrStep(&right); }else{ posListUnion(&left, &right, &writer); dlrStep(&left); dlrStep(&right); } } dlrDestroy(&left); dlrDestroy(&right); dlwDestroy(&writer);}/* ** This function is used as part of the implementation of phrase and** NEAR matching.**** pLeft and pRight are DLReaders positioned to the same docid in** lists of type DL_POSITION. This function writes an entry to the** DLWriter pOut for each position in pRight that is less than** (nNear+1) greater (but not equal to or smaller) than a position ** in pLeft. For example, if nNear is 0, and the positions contained** by pLeft and pRight are:**** pLeft: 5 10 15 20** pRight: 6 9 17 21**** then the docid is added to pOut. If pOut is of type DL_POSITIONS,** then a positionids "6" and "21" are also added to pOut.**** If boolean argument isSaveLeft is true, then positionids are copied** from pLeft instead of pRight. In the example above, the positions "5"** and "20" would be added instead of "6" and "21".*/static void posListPhraseMerge( DLReader *pLeft, DLReader *pRight, int nNear, int isSaveLeft, DLWriter *pOut){ PLReader left, right; PLWriter writer; int match = 0; assert( dlrDocid(pLeft)==dlrDocid(pRight) ); assert( pOut->iType!=DL_POSITIONS_OFFSETS ); plrInit(&left, pLeft); plrInit(&right, pRight); while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ if( plrColumn(&left)<plrColumn(&right) ){ plrStep(&left); }else if( plrColumn(&left)>plrColumn(&right) ){ plrStep(&right); }else if( plrPosition(&left)>=plrPosition(&right) ){ plrStep(&right); }else{ if( (plrPosition(&right)-plrPosition(&left))<=(nNear+1) ){ if( !match ){ plwInit(&writer, pOut, dlrDocid(pLeft)); match = 1; } if( !isSaveLeft ){ plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); }else{ plwAdd(&writer, plrColumn(&left), plrPosition(&left), 0, 0); } plrStep(&right); }else{ plrStep(&left); } } } if( match ){ plwTerminate(&writer); plwDestroy(&writer); } plrDestroy(&left); plrDestroy(&right);}/*** Compare the values pointed to by the PLReaders passed as arguments. ** Return -1 if the value pointed to by pLeft is considered less than** the value pointed to by pRight, +1 if it is considered greater** than it, or 0 if it is equal. i.e.**** (*pLeft - *pRight)**** A PLReader that is in the EOF condition is considered greater than** any other. If neither argument is in EOF state, the return value of** plrColumn() is used. If the plrColumn() values are equal, the** comparison is on the basis of plrPosition().*/static int plrCompare(PLReader *pLeft, PLReader *pRight){ assert(!plrAtEnd(pLeft) || !plrAtEnd(pRight)); if( plrAtEnd(pRight) || plrAtEnd(pLeft) ){ return (plrAtEnd(pRight) ? -1 : 1); } if( plrColumn(pLeft)!=plrColumn(pRight) ){ return ((plrColumn(pLeft)<plrColumn(pRight)) ? -1 : 1); } if( plrPosition(pLeft)!=plrPosition(pRight) ){ return ((plrPosition(pLeft)<plrPosition(pRight)) ? -1 : 1); } return 0;}/* We have two doclists with positions: pLeft and pRight. Depending** on the value of the nNear parameter, perform either a phrase** intersection (if nNear==0) or a NEAR intersection (if nNear>0)** and write the results into pOut.**** A phrase intersection means that two documents only match** if pLeft.iPos+1==pRight.iPos.**** A NEAR intersection means that two documents only match if ** (abs(pLeft.iPos-pRight.iPos)<nNear).**** If a NEAR intersection is requested, then the nPhrase argument should** be passed the number of tokens in the two operands to the NEAR operator** combined. For example:**** Query syntax nPhrase** ------------------------------------** "A B C" NEAR "D E" 5** A NEAR B 2**** iType controls the type of data written to pOut. If iType is** DL_POSITIONS, the positions are those from pRight.*/static void docListPhraseMerge( const char *pLeft, int nLeft, const char *pRight, int nRight, int nNear, /* 0 for a phrase merge, non-zero for a NEAR merge */ int nPhrase, /* Number of tokens in left+right operands to NEAR */ DocListType iType, /* Type of doclist to write to pOut */ DataBuffer *pOut /* Write the combined doclist here */){ DLReader left, right; DLWriter writer; if( nLeft==0 || nRight==0 ) return; assert( iType!=DL_POSITIONS_OFFSETS ); dlrInit(&left, DL_POSITIONS, pLeft, nLeft); dlrInit(&right, DL_POSITIONS, pRight, nRight); dlwInit(&writer, iType, pOut); while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ if( dlrDocid(&left)<dlrDocid(&right) ){ dlrStep(&left); }else if( dlrDocid(&right)<dlrDocid(&left) ){ dlrStep(&right); }else{ if( nNear==0 ){ posListPhraseMerge(&left, &right, 0, 0, &writer); }else{ /* This case occurs when two terms (simple terms or phrases) are * connected by a NEAR operator, span (nNear+1). i.e. * * '"terrible company" NEAR widget' */ DataBuffer one = {0, 0, 0}; DataBuffer two = {0, 0, 0}; DLWriter dlwriter2; DLReader dr1 = {0, 0, 0, 0, 0}; DLReader dr2 = {0, 0, 0, 0, 0}; dlwInit(&dlwriter2, iType, &one); posListPhraseMerge(&right, &left, nNear-3+nPhrase, 1, &dlwriter2); dlwInit(&dlwriter2, iType, &two); posListPhraseMerge(&left, &right, nNear-1, 0, &dlwriter2); if( one.nData) dlrInit(&dr1, iType, one.pData, one.nData); if( two.nData) dlrInit(&dr2, iType, two.pData, two.nData); if( !dlrAtEnd(&dr1) || !dlrAtEnd(&dr2) ){ PLReader pr1 = {0}; PLReader pr2 = {0}; PLWriter plwriter; plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1)); if( one.nData ) plrInit(&pr1, &dr1); if( two.nData ) plrInit(&pr2, &dr2); while( !plrAtEnd(&pr1) || !plrAtEnd(&pr2) ){ int iCompare = plrCompare(&pr1, &pr2); switch( iCompare ){ case -1: plwCopy(&plwriter, &pr1); plrStep(&pr1); break; case 1: plwCopy(&plwriter, &pr2); plrStep(&pr2); break; case 0: plwCopy(&plwriter, &pr1); plrStep(&pr1); plrStep(&pr2); break; } } plwTerminate(&plwriter); } dataBufferDestroy(&one);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -