segmentinfo.php.svn-base
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 972 行 · 第 1/2 页
SVN-BASE
972 行
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License *//** Zend_Search_Lucene_Index_DictionaryLoader */require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';/** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */class Zend_Search_Lucene_Index_SegmentInfo{ /** * Number of docs in a segment * * @var integer */ private $_docCount; /** * Segment name * * @var string */ private $_name; /** * Term Dictionary Index * * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because * of performance considerations) * [0] -> $termValue * [1] -> $termFieldNum * * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos * * @var array */ private $_termDictionary; /** * Term Dictionary Index TermInfos * * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because * of performance considerations) * [0] -> $docFreq * [1] -> $freqPointer * [2] -> $proxPointer * [3] -> $skipOffset * [4] -> $indexPointer * * @var array */ private $_termDictionaryInfos; /** * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment * * @var array */ private $_fields; /** * Field positions in a dictionary. * (Term dictionary contains filelds ordered by names) * * @var array */ private $_fieldsDicPositions; /** * Associative array where the key is the file name and the value is data offset * in a compound segment file (.csf). * * @var array */ private $_segFiles; /** * Associative array where the key is the file name and the value is file size (.csf). * * @var array */ private $_segFileSizes; /** * File system adapter. * * @var Zend_Search_Lucene_Storage_Directory_Filesystem */ private $_directory; /** * Normalization factors. * An array fieldName => normVector * normVector is a binary string. * Each byte corresponds to an indexed document in a segment and * encodes normalization factor (float value, encoded by * Zend_Search_Lucene_Search_Similarity::encodeNorm()) * * @var array */ private $_norms = array(); /** * List of deleted documents. * bitset if bitset extension is loaded or array otherwise. * * @var mixed */ private $_deleted; /** * $this->_deleted update flag * * @var boolean */ private $_deletedDirty = false; /** * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, * Documents count and Directory as a parameter. * * @param string $name * @param integer $docCount * @param Zend_Search_Lucene_Storage_Directory $directory */ public function __construct($name, $docCount, $directory) { $this->_name = $name; $this->_docCount = $docCount; $this->_directory = $directory; $this->_termDictionary = null; $this->_segFiles = array(); if ($this->_directory->fileExists($name . '.cfs')) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count=0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 1, $count, $fieldBits & 2 ); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); try { $delFile = $this->openCompoundFile('.del'); $byteCount = $delFile->readInt(); $byteCount = ceil($byteCount/8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes{$count}); for ($bit = 0; $bit < 8; $bit++) { if ($byte & (1<<$bit)) { $this->_deleted[$count*8 + $bit] = 1; } } } } } catch(Zend_Search_Exception $e) { if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) { $this->_deleted = null; } else { throw $e; } } } /** * Opens index file stoted within compound index file * * @param string $extension * @param boolean $shareHandler * @throws Zend_Search_Lucene_Exception * @return Zend_Search_Lucene_Storage_File */ public function openCompoundFile($extension, $shareHandler = true) { $filename = $this->_name . $extension; // Try to open common file first if ($this->_directory->fileExists($filename)) { return $this->_directory->getFileObject($filename, $shareHandler); } if( !isset($this->_segFiles[$filename]) ) { throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.' ); } $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler); $file->seek($this->_segFiles[$filename]); return $file; } /** * Get compound file length * * @param string $extension * @return integer */ public function compoundFileLength($extension) { $filename = $this->_name . $extension; // Try to get common file first if ($this->_directory->fileExists($filename)) { return $this->_directory->fileLength($filename); } if( !isset($this->_segFileSizes[$filename]) ) { throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.' ); } return $this->_segFileSizes[$filename]; } /** * Returns field index or -1 if field is not found * * @param string $fieldName * @return integer */ public function getFieldNum($fieldName) { foreach( $this->_fields as $field ) { if( $field->name == $fieldName ) { return $field->number; } } return -1; } /** * Returns field info for specified field * * @param integer $fieldNum * @return ZSearchFieldInfo */ public function getField($fieldNum) { return $this->_fields[$fieldNum]; } /** * Returns array of fields. * if $indexed parameter is true, then returns only indexed fields. * * @param boolean $indexed * @return array */ public function getFields($indexed = false) { $result = array(); foreach( $this->_fields as $field ) { if( (!$indexed) || $field->isIndexed ) { $result[ $field->name ] = $field->name; } } return $result; } /** * Returns array of FieldInfo objects. * * @return array */ public function getFieldInfos() { return $this->_fields; } /** * Returns the total number of documents in this segment (including deleted documents). * * @return integer */ public function count() { return $this->_docCount; } /** * Returns number of deleted documents. * * @return integer */ private function _deletedCount() { if ($this->_deleted === null) { return 0; } if (extension_loaded('bitset')) { return count(bitset_to_array($this->_deleted)); } else { return count($this->_deleted); } } /** * Returns the total number of non-deleted documents in this segment. * * @return integer */ public function numDocs() { if ($this->hasDeletions()) { return $this->_docCount - $this->_deletedCount(); } else { return $this->_docCount; } } /** * Get field position in a fields dictionary * * @param integer $fieldNum * @return integer */ private function _getFieldPosition($fieldNum) { // Treat values which are not in a translation table as a 'direct value' return isset($this->_fieldsDicPositions[$fieldNum]) ? $this->_fieldsDicPositions[$fieldNum] : $fieldNum; } /** * Return segment name * * @return string */ public function getName() { return $this->_name; } /** * TermInfo cache * * Size is 1024. * Numbers are used instead of class constants because of performance considerations * * @var array */ private $_termInfoCache = array(); private function _cleanUpTermInfoCache() { // Clean 256 term infos foreach ($this->_termInfoCache as $key => $termInfo) { unset($this->_termInfoCache[$key]); // leave 768 last used term infos if (count($this->_termInfoCache) == 768) { break; } } } /** * Scans terms dictionary and returns term info * * @param Zend_Search_Lucene_Index_Term $term * @return Zend_Search_Lucene_Index_TermInfo */ public function getTermInfo(Zend_Search_Lucene_Index_Term $term) { $termKey = $term->key(); if (isset($this->_termInfoCache[$termKey])) { $termInfo = $this->_termInfoCache[$termKey]; // Move termInfo to the end of cache unset($this->_termInfoCache[$termKey]); $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } if ($this->_termDictionary === null) { // Check, if index is already serialized if ($this->_directory->fileExists($this->_name . '.sti')) { // Prefetch dictionary index data $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData); } else { // Prefetch dictionary index data $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); } } $searchField = $this->getFieldNum($term->field); if ($searchField == -1) { return null; } $searchDicField = $this->_getFieldPosition($searchField);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?