segmentinfo.php.svn-base
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 972 行 · 第 1/2 页
SVN-BASE
972 行
// search for appropriate value in dictionary $lowIndex = 0; $highIndex = count($this->_termDictionary)-1; while ($highIndex >= $lowIndex) { // $mid = ($highIndex - $lowIndex)/2; $mid = ($highIndex + $lowIndex) >> 1; $midTerm = $this->_termDictionary[$mid]; $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */); $delta = $searchDicField - $fieldNum; if ($delta == 0) { $delta = strcmp($term->text, $midTerm[1] /* text */); } if ($delta < 0) { $highIndex = $mid-1; } elseif ($delta > 0) { $lowIndex = $mid+1; } else { // return $this->_termDictionaryInfos[$mid]; // We got it! $a = $this->_termDictionaryInfos[$mid]; $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]); // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } } if ($highIndex == -1) { // Term is out of the dictionary range return null; } $prevPosition = $highIndex; $prevTerm = $this->_termDictionary[$prevPosition]; $prevTermInfo = $this->_termDictionaryInfos[$prevPosition]; $tisFile = $this->openCompoundFile('.tis'); $tiVersion = $tisFile->readInt(); if ($tiVersion != (int)0xFFFFFFFE) { throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); } $termCount = $tisFile->readLong(); $indexInterval = $tisFile->readInt(); $skipInterval = $tisFile->readInt(); $tisFile->seek($prevTermInfo[4] /* indexPointer */ - 20 /* header size*/, SEEK_CUR); $termValue = $prevTerm[1] /* text */; $termFieldNum = $prevTerm[0] /* field */; $freqPointer = $prevTermInfo[1] /* freqPointer */; $proxPointer = $prevTermInfo[2] /* proxPointer */; for ($count = $prevPosition*$indexInterval + 1; $count <= $termCount && ( $this->_getFieldPosition($termFieldNum) < $searchDicField || ($this->_getFieldPosition($termFieldNum) == $searchDicField && strcmp($termValue, $term->text) < 0) ); $count++) { $termPrefixLength = $tisFile->readVInt(); $termSuffix = $tisFile->readString(); $termFieldNum = $tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix; $docFreq = $tisFile->readVInt(); $freqPointer += $tisFile->readVInt(); $proxPointer += $tisFile->readVInt(); if( $docFreq >= $skipInterval ) { $skipOffset = $tisFile->readVInt(); } else { $skipOffset = 0; } } if ($termFieldNum == $searchField && $termValue == $term->text) { $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); } else { $termInfo = null; } // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; if (count($this->_termInfoCache) == 1024) { $this->_cleanUpTermInfoCache(); } return $termInfo; } /** * Load normalizatin factors from an index file * * @param integer $fieldNum */ private function _loadNorm($fieldNum) { $fFile = $this->openCompoundFile('.f' . $fieldNum); $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); } /** * Returns normalization factor for specified documents * * @param integer $id * @param string $fieldName * @return float */ public function norm($id, $fieldName) { $fieldNum = $this->getFieldNum($fieldName); if ( !($this->_fields[$fieldNum]->isIndexed) ) { return null; } if (!isset($this->_norms[$fieldNum])) { $this->_loadNorm($fieldNum); } return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) ); } /** * Returns norm vector, encoded in a byte string * * @param string $fieldName * @return string */ public function normVector($fieldName) { $fieldNum = $this->getFieldNum($fieldName); if ($fieldNum == -1 || !($this->_fields[$fieldNum]->isIndexed)) { $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )), $this->_docCount); } if (!isset($this->_norms[$fieldNum])) { $this->_loadNorm($fieldNum); } return $this->_norms[$fieldNum]; } /** * Returns true if any documents have been deleted from this index segment. * * @return boolean */ public function hasDeletions() { return $this->_deleted !== null; } /** * Deletes a document from the index segment. * $id is an internal document id * * @param integer */ public function delete($id) { $this->_deletedDirty = true; if (extension_loaded('bitset')) { if ($this->_deleted === null) { $this->_deleted = bitset_empty($id); } bitset_incl($this->_deleted, $id); } else { if ($this->_deleted === null) { $this->_deleted = array(); } $this->_deleted[$id] = 1; } } /** * Checks, that document is deleted * * @param integer * @return boolean */ public function isDeleted($id) { if ($this->_deleted === null) { return false; } if (extension_loaded('bitset')) { return bitset_in($this->_deleted, $id); } else { return isset($this->_deleted[$id]); } } /** * Write changes if it's necessary. */ public function writeChanges() { if (!$this->_deletedDirty) { return; } if (extension_loaded('bitset')) { $delBytes = $this->_deleted; $bitCount = count(bitset_to_array($delBytes)); } else { $byteCount = floor($this->_docCount/8)+1; $delBytes = str_repeat(chr(0), $byteCount); for ($count = 0; $count < $byteCount; $count++) { $byte = 0; for ($bit = 0; $bit < 8; $bit++) { if (isset($this->_deleted[$count*8 + $bit])) { $byte |= (1<<$bit); } } $delBytes{$count} = chr($byte); } $bitCount = count($this->_deleted); } $delFile = $this->_directory->createFile($this->_name . '.del'); $delFile->writeInt($this->_docCount); $delFile->writeInt($bitCount); $delFile->writeBytes($delBytes); $this->_deletedDirty = false; } /** * Term Dictionary File object for stream like terms reading * * @var Zend_Search_Lucene_Storage_File */ private $_tisFile = null; /** * Frequencies File object for stream like terms reading * * @var Zend_Search_Lucene_Storage_File */ private $_frqFile = null; /** * Offset of the .frq file in the compound file * * @var integer */ private $_frqFileOffset; /** * Positions File object for stream like terms reading * * @var Zend_Search_Lucene_Storage_File */ private $_prxFile = null; /** * Offset of the .prx file in the compound file * * @var integer */ private $_prxFileOffset; /** * Number of terms in term stream * * @var integer */ private $_termCount = 0; /** * Segment skip interval * * @var integer */ private $_skipInterval; /** * Last TermInfo in a terms stream * * @var Zend_Search_Lucene_Index_TermInfo */ private $_lastTermInfo = null; /** * Last Term in a terms stream * * @var Zend_Search_Lucene_Index_Term */ private $_lastTerm = null; /** * Map of the document IDs * Used to get new docID after removing deleted documents. * It's not very effective from memory usage point of view, * but much more faster, then other methods * * @var array|null */ private $_docMap = null; /** * An array of all term positions in the documents. * Array structure: array( docId => array( pos1, pos2, ...), ...) * * @var array */ private $_lastTermPositions; /** * Reset terms stream * * $startId - id for the fist document * $compact - remove deleted documents * * Returns start document id for the next segment * * @param integer $startId * @param boolean $compact * @throws Zend_Search_Lucene_Exception * @return integer */ public function reset($startId = 0, $compact = false) { if ($this->_tisFile !== null) { $this->_tisFile = null; } $this->_tisFile = $this->openCompoundFile('.tis', false); $tiVersion = $this->_tisFile->readInt(); if ($tiVersion != (int)0xFFFFFFFE) { throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); } $this->_termCount = $this->_tisFile->readLong(); $this->_tisFile->readInt(); // Read Index interval $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval if ($this->_frqFile !== null) { $this->_frqFile = null; } $this->_frqFile = $this->openCompoundFile('.frq', false); $this->_frqFileOffset = $this->_frqFile->tell(); if ($this->_prxFile !== null) { $this->_prxFile = null; } $this->_prxFile = $this->openCompoundFile('.prx', false); $this->_prxFileOffset = $this->_prxFile->tell(); $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1); $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0); $this->_docMap = array(); for ($count = 0; $count < $this->_docCount; $count++) { if (!$this->isDeleted($count)) { $this->_docMap[$count] = $startId + ($compact ? count($this->_docMap) : $count); } } $this->nextTerm(); return $startId + ($compact ? count($this->_docMap) : $this->_docCount); } /** * Scans terms dictionary and returns next term * * @return Zend_Search_Lucene_Index_Term|null */ public function nextTerm() { if ($this->_tisFile === null || $this->_termCount == 0) { $this->_lastTerm = null; $this->_lastTermInfo = null; // may be necessary for "empty" segment $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; return null; } $termPrefixLength = $this->_tisFile->readVInt(); $termSuffix = $this->_tisFile->readString(); $termFieldNum = $this->_tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix; $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name); $docFreq = $this->_tisFile->readVInt(); $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt(); $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt(); if ($docFreq >= $this->_skipInterval) { $skipOffset = $this->_tisFile->readVInt(); } else { $skipOffset = 0; } $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); $this->_lastTermPositions = array(); $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); $freqs = array(); $docId = 0; for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { $docDelta = $this->_frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; $freqs[ $docId ] = 1; } else { $docId += $docDelta/2; $freqs[ $docId ] = $this->_frqFile->readVInt(); } } $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); foreach ($freqs as $docId => $freq) { $termPosition = 0; $positions = array(); for ($count = 0; $count < $freq; $count++ ) { $termPosition += $this->_prxFile->readVInt(); $positions[] = $termPosition; } if (isset($this->_docMap[$docId])) { $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; } } $this->_termCount--; if ($this->_termCount == 0) { $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; } return $this->_lastTerm; } /** * Returns term in current position * * @param Zend_Search_Lucene_Index_Term $term * @return Zend_Search_Lucene_Index_Term|null */ public function currentTerm() { return $this->_lastTerm; } /** * Returns an array of all term positions in the documents. * Return array structure: array( docId => array( pos1, pos2, ...), ...) * * @return array */ public function currentTermPositions() { return $this->_lastTermPositions; }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?