📄 segmentinfo.php
字号:
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License *//** Zend_Search_Lucene_Index_DictionaryLoader */require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';/** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** Zend_Search_Lucene_LockManager */if (@$CFG->block_search_softlock){ require_once "Zend/Search/Lucene/SoftLockManager.php";} else { require_once "Zend/Search/Lucene/LockManager.php";}/** * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */class Zend_Search_Lucene_Index_SegmentInfo{ /** * Number of docs in a segment * * @var integer */ private $_docCount; /** * Segment name * * @var string */ private $_name; /** * Term Dictionary Index * * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because * of performance considerations) * [0] -> $termValue * [1] -> $termFieldNum * * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos * * @var array */ private $_termDictionary; /** * Term Dictionary Index TermInfos * * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because * of performance considerations) * [0] -> $docFreq * [1] -> $freqPointer * [2] -> $proxPointer * [3] -> $skipOffset * [4] -> $indexPointer * * @var array */ private $_termDictionaryInfos; /** * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment * * @var array */ private $_fields; /** * Field positions in a dictionary. * (Term dictionary contains filelds ordered by names) * * @var array */ private $_fieldsDicPositions; /** * Associative array where the key is the file name and the value is data offset * in a compound segment file (.csf). * * @var array */ private $_segFiles; /** * Associative array where the key is the file name and the value is file size (.csf). * * @var array */ private $_segFileSizes; /** * Delete file generation number * * -1 means 'there is no delete file' * 0 means pre-2.1 format delete file * X specifies used delete file * * @var integer */ private $_delGen; /** * Segment has single norms file * * If true then one .nrm file is used for all fields * Otherwise .fN files are used * * @var boolean */ private $_hasSingleNormFile; /** * Use compound segment file (*.cfs) to collect all other segment files * (excluding .del files) * * @var boolean */ private $_isCompound; /** * File system adapter. * * @var Zend_Search_Lucene_Storage_Directory_Filesystem */ private $_directory; /** * Normalization factors. * An array fieldName => normVector * normVector is a binary string. * Each byte corresponds to an indexed document in a segment and * encodes normalization factor (float value, encoded by * Zend_Search_Lucene_Search_Similarity::encodeNorm()) * * @var array */ private $_norms = array(); /** * List of deleted documents. * bitset if bitset extension is loaded or array otherwise. * * @var mixed */ private $_deleted = null; /** * $this->_deleted update flag * * @var boolean */ private $_deletedDirty = false; /** * Zend_Search_Lucene_Index_SegmentInfo constructor * * @param Zend_Search_Lucene_Storage_Directory $directory * @param string $name * @param integer $docCount * @param integer $delGen * @param boolean $isCompound */ public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $hasSingleNormFile = false, $isCompound = null) { $this->_directory = $directory; $this->_name = $name; $this->_docCount = $docCount; $this->_hasSingleNormFile = $hasSingleNormFile; $this->_delGen = $delGen; $this->_termDictionary = null; if (!is_null($isCompound)) { $this->_isCompound = $isCompound; } else { // It's a pre-2.1 segment // detect if it uses compond file $this->_isCompound = true; try { // Try to open compound file $this->_directory->getFileObject($name . '.cfs'); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { // Compound file is not found or is not readable $this->_isCompound = false; } else { throw $e; } } } $this->_segFiles = array(); if ($this->_isCompound) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count=0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 1, $count, $fieldBits & 2 ); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); if ($this->_delGen == -1) { // There is no delete file for this segment // Do nothing } else if ($this->_delGen == 0) { // It's a segment with pre-2.1 format delete file // Try to find delete file try { // '.del' files always stored in a separate file // Segment compound is not used $delFile = $this->_directory->getFileObject($this->_name . '.del'); $byteCount = $delFile->readInt(); $byteCount = ceil($byteCount/8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes{$count}); for ($bit = 0; $bit < 8; $bit++) { if ($byte & (1<<$bit)) { $this->_deleted[$count*8 + $bit] = 1; } } } } } catch(Zend_Search_Exception $e) { if (strpos($e->getMessage(), 'is not readable') === false ) { throw $e; } // There is no delete file // Do nothing } } else { // It's 2.1+ format delete file $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); $format = $delFile->readInt(); if ($format == (int)0xFFFFFFFF) { /** * @todo Implement support of DGaps delete file format. * See Lucene file format for details - http://lucene.apache.org/java/docs/fileformats.html#Deleted%20Documents */ throw new Zend_Search_Lucene_Exception('DGaps delete file format is not supported. Optimize index to use it with Zend_Search_Lucene'); } else { // $format is actually byte count $byteCount = ceil($format/8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes{$count}); for ($bit = 0; $bit < 8; $bit++) { if ($byte & (1<<$bit)) { $this->_deleted[$count*8 + $bit] = 1; } } } } } } } /** * Opens index file stoted within compound index file * * @param string $extension * @param boolean $shareHandler * @throws Zend_Search_Lucene_Exception * @return Zend_Search_Lucene_Storage_File */ public function openCompoundFile($extension, $shareHandler = true) { $filename = $this->_name . $extension; if (!$this->_isCompound) { return $this->_directory->getFileObject($filename, $shareHandler); } if( !isset($this->_segFiles[$filename]) ) { throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain ' . $filename . ' file.' ); } $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler); $file->seek($this->_segFiles[$filename]); return $file; } /** * Get compound file length *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -