📄 fts2.c
字号:
}while( vu!=0 ); q[-1] &= 0x7f; /* turn off high bit in final byte */ assert( q - (unsigned char *)p <= VARINT_MAX ); return (int) (q - (unsigned char *)p);}/* Read a 64-bit variable-length integer from memory starting at p[0]. * Return the number of bytes read, or 0 on error. * The value is stored in *v. */static int getVarint(const char *p, sqlite_int64 *v){ const unsigned char *q = (const unsigned char *) p; sqlite_uint64 x = 0, y = 1; while( (*q & 0x80) == 0x80 ){ x += y * (*q++ & 0x7f); y <<= 7; if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ assert( 0 ); return 0; } } x += y * (*q++); *v = (sqlite_int64) x; return (int) (q - (unsigned char *)p);}static int getVarint32(const char *p, int *pi){ sqlite_int64 i; int ret = getVarint(p, &i); *pi = (int) i; assert( *pi==i ); return ret;}/*******************************************************************//* DataBuffer is used to collect data into a buffer in piecemeal** fashion. It implements the usual distinction between amount of** data currently stored (nData) and buffer capacity (nCapacity).**** dataBufferInit - create a buffer with given initial capacity.** dataBufferReset - forget buffer's data, retaining capacity.** dataBufferDestroy - free buffer's data.** dataBufferExpand - expand capacity without adding data.** dataBufferAppend - append data.** dataBufferAppend2 - append two pieces of data at once.** dataBufferReplace - replace buffer's data.*/typedef struct DataBuffer { char *pData; /* Pointer to malloc'ed buffer. */ int nCapacity; /* Size of pData buffer. */ int nData; /* End of data loaded into pData. */} DataBuffer;static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){ assert( nCapacity>=0 ); pBuffer->nData = 0; pBuffer->nCapacity = nCapacity; pBuffer->pData = nCapacity==0 ? NULL : malloc(nCapacity);}static void dataBufferReset(DataBuffer *pBuffer){ pBuffer->nData = 0;}static void dataBufferDestroy(DataBuffer *pBuffer){ if( pBuffer->pData!=NULL ) free(pBuffer->pData); SCRAMBLE(pBuffer);}static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ assert( nAddCapacity>0 ); /* TODO(shess) Consider expanding more aggressively. Note that the ** underlying malloc implementation may take care of such things for ** us already. */ if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){ pBuffer->nCapacity = pBuffer->nData+nAddCapacity; pBuffer->pData = realloc(pBuffer->pData, pBuffer->nCapacity); }}static void dataBufferAppend(DataBuffer *pBuffer, const char *pSource, int nSource){ assert( nSource>0 && pSource!=NULL ); dataBufferExpand(pBuffer, nSource); memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource); pBuffer->nData += nSource;}static void dataBufferAppend2(DataBuffer *pBuffer, const char *pSource1, int nSource1, const char *pSource2, int nSource2){ assert( nSource1>0 && pSource1!=NULL ); assert( nSource2>0 && pSource2!=NULL ); dataBufferExpand(pBuffer, nSource1+nSource2); memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); pBuffer->nData += nSource1+nSource2;}static void dataBufferReplace(DataBuffer *pBuffer, const char *pSource, int nSource){ dataBufferReset(pBuffer); dataBufferAppend(pBuffer, pSource, nSource);}/* StringBuffer is a null-terminated version of DataBuffer. */typedef struct StringBuffer { DataBuffer b; /* Includes null terminator. */} StringBuffer;static void initStringBuffer(StringBuffer *sb){ dataBufferInit(&sb->b, 100); dataBufferReplace(&sb->b, "", 1);}static int stringBufferLength(StringBuffer *sb){ return sb->b.nData-1;}static char *stringBufferData(StringBuffer *sb){ return sb->b.pData;}static void stringBufferDestroy(StringBuffer *sb){ dataBufferDestroy(&sb->b);}static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ assert( sb->b.nData>0 ); if( nFrom>0 ){ sb->b.nData--; dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1); }}static void append(StringBuffer *sb, const char *zFrom){ nappend(sb, zFrom, strlen(zFrom));}/* Append a list of strings separated by commas. */static void appendList(StringBuffer *sb, int nString, char **azString){ int i; for(i=0; i<nString; ++i){ if( i>0 ) append(sb, ", "); append(sb, azString[i]); }}static int endsInWhiteSpace(StringBuffer *p){ return stringBufferLength(p)>0 && safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);}/* If the StringBuffer ends in something other than white space, add a** single space character to the end.*/static void appendWhiteSpace(StringBuffer *p){ if( stringBufferLength(p)==0 ) return; if( !endsInWhiteSpace(p) ) append(p, " ");}/* Remove white space from the end of the StringBuffer */static void trimWhiteSpace(StringBuffer *p){ while( endsInWhiteSpace(p) ){ p->b.pData[--p->b.nData-1] = '\0'; }}/*******************************************************************//* DLReader is used to read document elements from a doclist. The** current docid is cached, so dlrDocid() is fast. DLReader does not** own the doclist buffer.**** dlrAtEnd - true if there's no more data to read.** dlrDocid - docid of current document.** dlrDocData - doclist data for current document (including docid).** dlrDocDataBytes - length of same.** dlrAllDataBytes - length of all remaining data.** dlrPosData - position data for current document.** dlrPosDataLen - length of pos data for current document (incl POS_END).** dlrStep - step to current document.** dlrInit - initial for doclist of given type against given data.** dlrDestroy - clean up.**** Expected usage is something like:**** DLReader reader;** dlrInit(&reader, pData, nData);** while( !dlrAtEnd(&reader) ){** // calls to dlrDocid() and kin.** dlrStep(&reader);** }** dlrDestroy(&reader);*/typedef struct DLReader { DocListType iType; const char *pData; int nData; sqlite_int64 iDocid; int nElement;} DLReader;static int dlrAtEnd(DLReader *pReader){ assert( pReader->nData>=0 ); return pReader->nData==0;}static sqlite_int64 dlrDocid(DLReader *pReader){ assert( !dlrAtEnd(pReader) ); return pReader->iDocid;}static const char *dlrDocData(DLReader *pReader){ assert( !dlrAtEnd(pReader) ); return pReader->pData;}static int dlrDocDataBytes(DLReader *pReader){ assert( !dlrAtEnd(pReader) ); return pReader->nElement;}static int dlrAllDataBytes(DLReader *pReader){ assert( !dlrAtEnd(pReader) ); return pReader->nData;}/* TODO(shess) Consider adding a field to track iDocid varint length** to make these two functions faster. This might matter (a tiny bit)** for queries.*/static const char *dlrPosData(DLReader *pReader){ sqlite_int64 iDummy; int n = getVarint(pReader->pData, &iDummy); assert( !dlrAtEnd(pReader) ); return pReader->pData+n;}static int dlrPosDataLen(DLReader *pReader){ sqlite_int64 iDummy; int n = getVarint(pReader->pData, &iDummy); assert( !dlrAtEnd(pReader) ); return pReader->nElement-n;}static void dlrStep(DLReader *pReader){ assert( !dlrAtEnd(pReader) ); /* Skip past current doclist element. */ assert( pReader->nElement<=pReader->nData ); pReader->pData += pReader->nElement; pReader->nData -= pReader->nElement; /* If there is more data, read the next doclist element. */ if( pReader->nData!=0 ){ sqlite_int64 iDocidDelta; int iDummy, n = getVarint(pReader->pData, &iDocidDelta); pReader->iDocid += iDocidDelta; if( pReader->iType>=DL_POSITIONS ){ assert( n<pReader->nData ); while( 1 ){ n += getVarint32(pReader->pData+n, &iDummy); assert( n<=pReader->nData ); if( iDummy==POS_END ) break; if( iDummy==POS_COLUMN ){ n += getVarint32(pReader->pData+n, &iDummy); assert( n<pReader->nData ); }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ n += getVarint32(pReader->pData+n, &iDummy); n += getVarint32(pReader->pData+n, &iDummy); assert( n<pReader->nData ); } } } pReader->nElement = n; assert( pReader->nElement<=pReader->nData ); }}static void dlrInit(DLReader *pReader, DocListType iType, const char *pData, int nData){ assert( pData!=NULL && nData!=0 ); pReader->iType = iType; pReader->pData = pData; pReader->nData = nData; pReader->nElement = 0; pReader->iDocid = 0; /* Load the first element's data. There must be a first element. */ dlrStep(pReader);}static void dlrDestroy(DLReader *pReader){ SCRAMBLE(pReader);}#ifndef NDEBUG/* Verify that the doclist can be validly decoded. Also returns the** last docid found because it's convenient in other assertions for** DLWriter.*/static void docListValidate(DocListType iType, const char *pData, int nData, sqlite_int64 *pLastDocid){ sqlite_int64 iPrevDocid = 0; assert( nData>0 ); assert( pData!=0 ); assert( pData+nData>pData ); while( nData!=0 ){ sqlite_int64 iDocidDelta; int n = getVarint(pData, &iDocidDelta); iPrevDocid += iDocidDelta; if( iType>DL_DOCIDS ){ int iDummy; while( 1 ){ n += getVarint32(pData+n, &iDummy); if( iDummy==POS_END ) break; if( iDummy==POS_COLUMN ){ n += getVarint32(pData+n, &iDummy); }else if( iType>DL_POSITIONS ){ n += getVarint32(pData+n, &iDummy); n += getVarint32(pData+n, &iDummy); } assert( n<=nData ); } } assert( n<=nData ); pData += n; nData -= n; } if( pLastDocid ) *pLastDocid = iPrevDocid;}#define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o)#else#define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 )#endif/*******************************************************************//* DLWriter is used to write doclist data to a DataBuffer. DLWriter** always appends to the buffer and does not own it.**** dlwInit - initialize to write a given type doclistto a buffer.** dlwDestroy - clear the writer's memory. Does not free buffer.** dlwAppend - append raw doclist data to buffer.** dlwCopy - copy next doclist from reader to writer.** dlwAdd - construct doclist element and append to buffer.** Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter).*/typedef struct DLWriter { DocListType iType; DataBuffer *b; sqlite_int64 iPrevDocid;#ifndef NDEBUG int has_iPrevDocid;#endif} DLWriter;static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){ pWriter->b = b; pWriter->iType = iType; pWriter->iPrevDocid = 0;#ifndef NDEBUG pWriter->has_iPrevDocid = 0;#endif}static void dlwDestroy(DLWriter *pWriter){ SCRAMBLE(pWriter);}/* iFirstDocid is the first docid in the doclist in pData. It is** needed because pData may point within a larger doclist, in which** case the first item would be delta-encoded.**** iLastDocid is the final docid in the doclist in pData. It is** needed to create the new iPrevDocid for future delta-encoding. The** code could decode the passed doclist to recreate iLastDocid, but** the only current user (docListMerge) already has decoded this** information.*//* TODO(shess) This has become just a helper for docListMerge.** Consider a refactor to make this cleaner.*/static void dlwAppend(DLWriter *pWriter, const char *pData, int nData, sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ sqlite_int64 iDocid = 0; char c[VARINT_MAX]; int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */#ifndef NDEBUG sqlite_int64 iLastDocidDelta;#endif /* Recode the initial docid as delta from iPrevDocid. */ nFirstOld = getVarint(pData, &iDocid); assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); /* Verify that the incoming doclist is valid AND that it ends with ** the expected docid. This is essential because we'll trust this ** docid in future delta-encoding. */ ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); /* Append recoded initial docid and everything else. Rest of docids ** should have been delta-encoded from previous initial docid. */ if( nFirstOld<nData ){ dataBufferAppend2(pWriter->b, c, nFirstNew, pData+nFirstOld, nData-nFirstOld); }else{ dataBufferAppend(pWriter->b, c, nFirstNew); } pWriter->iPrevDocid = iLastDocid;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -