📄 segmentinfo.php
字号:
* @param string $extension * @return integer */ public function compoundFileLength($extension) { $filename = $this->_name . $extension; // Try to get common file first if ($this->_directory->fileExists($filename)) { return $this->_directory->fileLength($filename); } if( !isset($this->_segFileSizes[$filename]) ) { throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.' ); } return $this->_segFileSizes[$filename]; } /** * Returns field index or -1 if field is not found * * @param string $fieldName * @return integer */ public function getFieldNum($fieldName) { foreach( $this->_fields as $field ) { if( $field->name == $fieldName ) { return $field->number; } } return -1; } /** * Returns field info for specified field * * @param integer $fieldNum * @return Zend_Search_Lucene_Index_FieldInfo */ public function getField($fieldNum) { return $this->_fields[$fieldNum]; } /** * Returns array of fields. * if $indexed parameter is true, then returns only indexed fields. * * @param boolean $indexed * @return array */ public function getFields($indexed = false) { $result = array(); foreach( $this->_fields as $field ) { if( (!$indexed) || $field->isIndexed ) { $result[ $field->name ] = $field->name; } } return $result; } /** * Returns array of FieldInfo objects. * * @return array */ public function getFieldInfos() { return $this->_fields; } /** * Returns actual deletions file generation number. * * @return integer */ public function getDelGen() { return $this->_delGen; } /** * Returns the total number of documents in this segment (including deleted documents). * * @return integer */ public function count() { return $this->_docCount; } /** * Returns number of deleted documents. * * @return integer */ private function _deletedCount() { if ($this->_deleted === null) { return 0; } if (extension_loaded('bitset')) { return count(bitset_to_array($this->_deleted)); } else { return count($this->_deleted); } } /** * Returns the total number of non-deleted documents in this segment. * * @return integer */ public function numDocs() { if ($this->hasDeletions()) { return $this->_docCount - $this->_deletedCount(); } else { return $this->_docCount; } } /** * Get field position in a fields dictionary * * @param integer $fieldNum * @return integer */ private function _getFieldPosition($fieldNum) { // Treat values which are not in a translation table as a 'direct value' return isset($this->_fieldsDicPositions[$fieldNum]) ? $this->_fieldsDicPositions[$fieldNum] : $fieldNum; } /** * Return segment name * * @return string */ public function getName() { return $this->_name; } /** * TermInfo cache * * Size is 1024. * Numbers are used instead of class constants because of performance considerations * * @var array */ private $_termInfoCache = array(); private function _cleanUpTermInfoCache() { // Clean 256 term infos foreach ($this->_termInfoCache as $key => $termInfo) { unset($this->_termInfoCache[$key]); // leave 768 last used term infos if (count($this->_termInfoCache) == 768) { break; } } } /** * Load terms dictionary index * * @throws Zend_Search_Lucene_Exception */ private function _loadDictionaryIndex() { // Check, if index is already serialized if ($this->_directory->fileExists($this->_name . '.sti')) { // Load serialized dictionary index data $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); // Load dictionary index data if (($unserializedData = @unserialize($stiFileData)) !== false) { list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData; return; } } // Load data from .tii file and generate .sti file // Prefetch dictionary index data $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); } /** * Scans terms dictionary and returns term info * * @param Zend_Search_Lucene_Index_Term $term * @return Zend_Search_Lucene_Index_TermInfo */ public function getTermInfo(Zend_Search_Lucene_Index_Term $term) { $termKey = $term->key(); if (isset($this->_termInfoCache[$termKey])) { $termInfo = $this->_termInfoCache[$termKey]; // Move termInfo to the end of cache unset($this->_termInfoCache[$termKey]); $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } if ($this->_termDictionary === null) { $this->_loadDictionaryIndex(); } $searchField = $this->getFieldNum($term->field); if ($searchField == -1) { return null; } $searchDicField = $this->_getFieldPosition($searchField); // search for appropriate value in dictionary $lowIndex = 0; $highIndex = count($this->_termDictionary)-1; while ($highIndex >= $lowIndex) { // $mid = ($highIndex - $lowIndex)/2; $mid = ($highIndex + $lowIndex) >> 1; $midTerm = $this->_termDictionary[$mid]; $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */); $delta = $searchDicField - $fieldNum; if ($delta == 0) { $delta = strcmp($term->text, $midTerm[1] /* text */); } if ($delta < 0) { $highIndex = $mid-1; } elseif ($delta > 0) { $lowIndex = $mid+1; } else { // return $this->_termDictionaryInfos[$mid]; // We got it! $a = $this->_termDictionaryInfos[$mid]; $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]); // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } } if ($highIndex == -1) { // Term is out of the dictionary range return null; } $prevPosition = $highIndex; $prevTerm = $this->_termDictionary[$prevPosition]; $prevTermInfo = $this->_termDictionaryInfos[$prevPosition]; $tisFile = $this->openCompoundFile('.tis'); $tiVersion = $tisFile->readInt(); if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ && $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) { throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); } $termCount = $tisFile->readLong(); $indexInterval = $tisFile->readInt(); $skipInterval = $tisFile->readInt(); if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) { $maxSkipLevels = $tisFile->readInt(); } $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR); $termValue = $prevTerm[1] /* text */; $termFieldNum = $prevTerm[0] /* field */; $freqPointer = $prevTermInfo[1] /* freqPointer */; $proxPointer = $prevTermInfo[2] /* proxPointer */; for ($count = $prevPosition*$indexInterval + 1; $count <= $termCount && ( $this->_getFieldPosition($termFieldNum) < $searchDicField || ($this->_getFieldPosition($termFieldNum) == $searchDicField && strcmp($termValue, $term->text) < 0) ); $count++) { $termPrefixLength = $tisFile->readVInt(); $termSuffix = $tisFile->readString(); $termFieldNum = $tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix; $docFreq = $tisFile->readVInt(); $freqPointer += $tisFile->readVInt(); $proxPointer += $tisFile->readVInt(); if( $docFreq >= $skipInterval ) { $skipOffset = $tisFile->readVInt(); } else { $skipOffset = 0; } } if ($termFieldNum == $searchField && $termValue == $term->text) { $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); } else { $termInfo = null; } // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; if (count($this->_termInfoCache) == 1024) { $this->_cleanUpTermInfoCache(); } return $termInfo; } /** * Returns term freqs array. * Result array structure: array(docId => freq, ...) * * @param Zend_Search_Lucene_Index_Term $term * @param integer $shift * @return Zend_Search_Lucene_Index_TermInfo */ public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0) { $termInfo = $this->getTermInfo($term); if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { return array(); } $frqFile = $this->openCompoundFile('.frq'); $frqFile->seek($termInfo->freqPointer,SEEK_CUR); $result = array(); $docId = 0; for ($count = 0; $count < $termInfo->docFreq; $count++) { $docDelta = $frqFile->readVInt(); if ($docDelta % 2 == 1) { $docId += ($docDelta-1)/2; $result[$shift + $docId] = 1; } else { $docId += $docDelta/2; $result[$shift + $docId] = $frqFile->readVInt(); } } return $result; } /** * Returns term positions array.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -