📄 segmentinfo.php
字号:
* Result array structure: array(docId => array(pos1, pos2, ...), ...) * * @param Zend_Search_Lucene_Index_Term $term * @param integer $shift * @return Zend_Search_Lucene_Index_TermInfo */ public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0) { $termInfo = $this->getTermInfo($term); if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { return array(); } $frqFile = $this->openCompoundFile('.frq'); $frqFile->seek($termInfo->freqPointer,SEEK_CUR); $freqs = array(); $docId = 0; for ($count = 0; $count < $termInfo->docFreq; $count++) { $docDelta = $frqFile->readVInt(); if ($docDelta % 2 == 1) { $docId += ($docDelta-1)/2; $freqs[$docId] = 1; } else { $docId += $docDelta/2; $freqs[$docId] = $frqFile->readVInt(); } } $result = array(); $prxFile = $this->openCompoundFile('.prx'); $prxFile->seek($termInfo->proxPointer, SEEK_CUR); foreach ($freqs as $docId => $freq) { $termPosition = 0; $positions = array(); for ($count = 0; $count < $freq; $count++ ) { $termPosition += $prxFile->readVInt(); $positions[] = $termPosition; } $result[$shift + $docId] = $positions; } return $result; } /** * Load normalizatin factors from an index file * * @param integer $fieldNum * @throws Zend_Search_Lucene_Exception */ private function _loadNorm($fieldNum) { if ($this->_hasSingleNormFile) { $normfFile = $this->openCompoundFile('.nrm'); $header = $normfFile->readBytes(3); $headerFormatVersion = $normfFile->readByte(); if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) { throw new Zend_Search_Lucene_Exception('Wrong norms file format.'); } foreach ($this->_fields as $fieldNum => $fieldInfo) { if ($fieldInfo->isIndexed) { $this->_norms[$fieldNum] = $normfFile->readBytes($this->_docCount); } } } else { $fFile = $this->openCompoundFile('.f' . $fieldNum); $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); } } /** * Returns normalization factor for specified documents * * @param integer $id * @param string $fieldName * @return float */ public function norm($id, $fieldName) { $fieldNum = $this->getFieldNum($fieldName); if ( !($this->_fields[$fieldNum]->isIndexed) ) { return null; } if (!isset($this->_norms[$fieldNum])) { $this->_loadNorm($fieldNum); } return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) ); } /** * Returns norm vector, encoded in a byte string * * @param string $fieldName * @return string */ public function normVector($fieldName) { $fieldNum = $this->getFieldNum($fieldName); if ($fieldNum == -1 || !($this->_fields[$fieldNum]->isIndexed)) { $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )), $this->_docCount); } if (!isset($this->_norms[$fieldNum])) { $this->_loadNorm($fieldNum); } return $this->_norms[$fieldNum]; } /** * Returns true if any documents have been deleted from this index segment. * * @return boolean */ public function hasDeletions() { return $this->_deleted !== null; } /** * Returns true if segment has single norms file. * * @return boolean */ public function hasSingleNormFile() { return $this->_hasSingleNormFile ? 1 : 0; } /** * Returns true if segment is stored using compound segment file. * * @return boolean */ public function isCompound() { return $this->_isCompound ? 1 : 0; } /** * Deletes a document from the index segment. * $id is an internal document id * * @param integer */ public function delete($id) { $this->_deletedDirty = true; if (extension_loaded('bitset')) { if ($this->_deleted === null) { $this->_deleted = bitset_empty($id); } bitset_incl($this->_deleted, $id); } else { if ($this->_deleted === null) { $this->_deleted = array(); } $this->_deleted[$id] = 1; } } /** * Checks, that document is deleted * * @param integer * @return boolean */ public function isDeleted($id) { if ($this->_deleted === null) { return false; } if (extension_loaded('bitset')) { return bitset_in($this->_deleted, $id); } else { return isset($this->_deleted[$id]); } } /** * Write changes if it's necessary. */ public function writeChanges() { if (!$this->_deletedDirty) { return; } if (extension_loaded('bitset')) { $delBytes = $this->_deleted; $bitCount = count(bitset_to_array($delBytes)); } else { $byteCount = floor($this->_docCount/8)+1; $delBytes = str_repeat(chr(0), $byteCount); for ($count = 0; $count < $byteCount; $count++) { $byte = 0; for ($bit = 0; $bit < 8; $bit++) { if (isset($this->_deleted[$count*8 + $bit])) { $byte |= (1<<$bit); } } $delBytes{$count} = chr($byte); } $bitCount = count($this->_deleted); } // Get new generation number Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); $delFileList = array(); foreach ($this->_directory->fileList() as $file) { if ($file == $this->_name . '.del') { // Matches <segment_name>.del file name $delFileList[] = 0; } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) { // Matches <segment_name>_NNN.del file names $delFileList[] = (int)base_convert($matches[1], 36, 10); } } if (count($delFileList) == 0) { // There is no deletions file for current segment in the directory // Set detetions file generation number to 1 $this->_delGen = 1; } else { // There are some deletions files for current segment in the directory // Set detetions file generation number to the highest + 1 $this->_delGen = max($delFileList) + 1; } $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); $delFile->writeInt($this->_docCount); $delFile->writeInt($bitCount); $delFile->writeBytes($delBytes); $this->_deletedDirty = false; } /** * Term Dictionary File object for stream like terms reading * * @var Zend_Search_Lucene_Storage_File */ private $_tisFile = null; /** * Actual offset of the .tis file data * * @var integer */ private $_tisFileOffset; /** * Frequencies File object for stream like terms reading * * @var Zend_Search_Lucene_Storage_File */ private $_frqFile = null; /** * Actual offset of the .frq file data * * @var integer */ private $_frqFileOffset; /** * Positions File object for stream like terms reading * * @var Zend_Search_Lucene_Storage_File */ private $_prxFile = null; /** * Actual offset of the .prx file in the compound file * * @var integer */ private $_prxFileOffset; /** * Actual number of terms in term stream * * @var integer */ private $_termCount = 0; /** * Overall number of terms in term stream * * @var integer */ private $_termNum = 0; /** * Segment index interval * * @var integer */ private $_indexInterval; /** * Segment skip interval * * @var integer */ private $_skipInterval; /** * Last TermInfo in a terms stream * * @var Zend_Search_Lucene_Index_TermInfo */ private $_lastTermInfo = null; /** * Last Term in a terms stream * * @var Zend_Search_Lucene_Index_Term */ private $_lastTerm = null; /** * Map of the document IDs * Used to get new docID after removing deleted documents. * It's not very effective from memory usage point of view, * but much more faster, then other methods * * @var array|null */ private $_docMap = null; /** * An array of all term positions in the documents. * Array structure: array( docId => array( pos1, pos2, ...), ...) * * Is set to null if term positions loading has to be skipped * * @var array|null */ private $_lastTermPositions; /** * Terms scan mode
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -