📄 segmentinfo.php
字号:
* * Values: * * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved * document numbers are compacted (shifted if segment has deleted documents) * * @var integer */ private $_termsScanMode; /** Scan modes */ const SM_TERMS_ONLY = 0; // terms are scanned, no additional info is retrieved const SM_FULL_INFO = 1; // terms are scanned, frequency and position info is retrieved const SM_MERGE_INFO = 2; // terms are scanned, frequency and position info is retrieved // document numbers are compacted (shifted if segment contains deleted documents) /** * Reset terms stream * * $startId - id for the fist document * $compact - remove deleted documents * * Returns start document id for the next segment * * @param integer $startId * @param integer $mode * @throws Zend_Search_Lucene_Exception * @return integer */ public function reset($startId = 0, $mode = self::SM_TERMS_ONLY) { if ($this->_tisFile !== null) { $this->_tisFile = null; } $this->_tisFile = $this->openCompoundFile('.tis', false); $this->_tisFileOffset = $this->_tisFile->tell(); $tiVersion = $this->_tisFile->readInt(); if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ && $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) { throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); } $this->_termCount = $this->_termNum = $this->_tisFile->readLong(); // Read terms count $this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) { $maxSkipLevels = $this->_tisFile->readInt(); } if ($this->_frqFile !== null) { $this->_frqFile = null; } if ($this->_prxFile !== null) { $this->_prxFile = null; } $this->_docMap = array(); $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1); $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0); $this->_lastTermPositions = null; $this->_termsScanMode = $mode; switch ($mode) { case self::SM_TERMS_ONLY: // Do nothing break; case self::SM_FULL_INFO: // break intentionally omitted case self::SM_MERGE_INFO: $this->_frqFile = $this->openCompoundFile('.frq', false); $this->_frqFileOffset = $this->_frqFile->tell(); $this->_prxFile = $this->openCompoundFile('.prx', false); $this->_prxFileOffset = $this->_prxFile->tell(); for ($count = 0; $count < $this->_docCount; $count++) { if (!$this->isDeleted($count)) { $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count); } } break; default: throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.'); break; } $this->nextTerm(); return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount); } /** * Skip terms stream up to specified term preffix. * * Prefix contains fully specified field info and portion of searched term * * @param Zend_Search_Lucene_Index_Term $prefix * @throws Zend_Search_Lucene_Exception */ public function skipTo(Zend_Search_Lucene_Index_Term $prefix) { if ($this->_termDictionary === null) { $this->_loadDictionaryIndex(); } $searchField = $this->getFieldNum($prefix->field); if ($searchField == -1) { /** * Field is not presented in this segment * Go to the end of dictionary */ $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; $this->_lastTerm = null; $this->_lastTermInfo = null; $this->_lastTermPositions = null; return; } $searchDicField = $this->_getFieldPosition($searchField); // search for appropriate value in dictionary $lowIndex = 0; $highIndex = count($this->_termDictionary)-1; while ($highIndex >= $lowIndex) { // $mid = ($highIndex - $lowIndex)/2; $mid = ($highIndex + $lowIndex) >> 1; $midTerm = $this->_termDictionary[$mid]; $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */); $delta = $searchDicField - $fieldNum; if ($delta == 0) { $delta = strcmp($prefix->text, $midTerm[1] /* text */); } if ($delta < 0) { $highIndex = $mid-1; } elseif ($delta > 0) { $lowIndex = $mid+1; } else { // We have reached term we are looking for break; } } if ($highIndex == -1) { // Term is out of the dictionary range $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; $this->_lastTerm = null; $this->_lastTermInfo = null; $this->_lastTermPositions = null; return; } $prevPosition = $highIndex; $prevTerm = $this->_termDictionary[$prevPosition]; $prevTermInfo = $this->_termDictionaryInfos[$prevPosition]; if ($this->_tisFile === null) { // The end of terms stream is reached and terms dictionary file is closed // Perform mini-reset operation $this->_tisFile = $this->openCompoundFile('.tis', false); if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { $this->_frqFile = $this->openCompoundFile('.frq', false); $this->_prxFile = $this->openCompoundFile('.prx', false); } } $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET); $this->_lastTerm = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */, ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name); $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */, $prevTermInfo[1] /* freqPointer */, $prevTermInfo[2] /* proxPointer */, $prevTermInfo[3] /* skipOffset */); $this->_termCount = $this->_termNum - $prevPosition*$this->_indexInterval; if ($highIndex == 0) { // skip start entry $this->nextTerm(); } else if ($prefix->field == $this->_lastTerm->field && $prefix->text == $this->_lastTerm->text) { // We got exact match in the dictionary index if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { $this->_lastTermPositions = array(); $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); $freqs = array(); $docId = 0; for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { $docDelta = $this->_frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; $freqs[ $docId ] = 1; } else { $docId += $docDelta/2; $freqs[ $docId ] = $this->_frqFile->readVInt(); } } $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); foreach ($freqs as $docId => $freq) { $termPosition = 0; $positions = array(); for ($count = 0; $count < $freq; $count++ ) { $termPosition += $this->_prxFile->readVInt(); $positions[] = $termPosition; } if (isset($this->_docMap[$docId])) { $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; } } } return; } // Search term matching specified prefix while ($this->_lastTerm !== null) { if ( strcmp($this->_lastTerm->field, $prefix->field) > 0 || ($prefix->field == $this->_lastTerm->field && strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) { // Current term matches or greate than the pattern return; } $this->nextTerm(); } } /** * Scans terms dictionary and returns next term * * @return Zend_Search_Lucene_Index_Term|null */ public function nextTerm() { if ($this->_tisFile === null || $this->_termCount == 0) { $this->_lastTerm = null; $this->_lastTermInfo = null; $this->_lastTermPositions = null; $this->_docMap = null; // may be necessary for "empty" segment $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; return null; } $termPrefixLength = $this->_tisFile->readVInt(); $termSuffix = $this->_tisFile->readString(); $termFieldNum = $this->_tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix; $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name); $docFreq = $this->_tisFile->readVInt(); $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt(); $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt(); if ($docFreq >= $this->_skipInterval) { $skipOffset = $this->_tisFile->readVInt(); } else { $skipOffset = 0; } $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { $this->_lastTermPositions = array(); $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); $freqs = array(); $docId = 0; for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { $docDelta = $this->_frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; $freqs[ $docId ] = 1; } else { $docId += $docDelta/2; $freqs[ $docId ] = $this->_frqFile->readVInt(); } } $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); foreach ($freqs as $docId => $freq) { $termPosition = 0; $positions = array(); for ($count = 0; $count < $freq; $count++ ) { $termPosition += $this->_prxFile->readVInt(); $positions[] = $termPosition; } if (isset($this->_docMap[$docId])) { $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; } } } $this->_termCount--; if ($this->_termCount == 0) { $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; } return $this->_lastTerm; } /** * Close terms stream * * Should be used for resources clean up if stream is not read up to the end */ public function closeTermsStream() { $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; $this->_lastTerm = null; $this->_lastTermInfo = null; $this->_lastTermPositions = null; $this->_docMap = null; } /** * Returns term in current position * * @return Zend_Search_Lucene_Index_Term|null */ public function currentTerm() { return $this->_lastTerm; } /** * Returns an array of all term positions in the documents. * Return array structure: array( docId => array( pos1, pos2, ...), ...) * * @return array */ public function currentTermPositions() { return $this->_lastTermPositions; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -