segmentinfo.php.svn-base

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 972 行 · 第 1/2 页

SVN-BASE
972
字号
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category   Zend * @package    Zend_Search_Lucene * @subpackage Index * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license    http://framework.zend.com/license/new-bsd     New BSD License *//** Zend_Search_Lucene_Index_DictionaryLoader */require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';/** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** * @category   Zend * @package    Zend_Search_Lucene * @subpackage Index * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license    http://framework.zend.com/license/new-bsd     New BSD License */class Zend_Search_Lucene_Index_SegmentInfo{    /**     * Number of docs in a segment     *     * @var integer     */    private $_docCount;    /**     * Segment name     *     * @var string     */    private $_name;    /**     * Term Dictionary Index     *     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because     * of performance considerations)     * [0] -> $termValue     * [1] -> $termFieldNum     *     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos     *     * @var array     */    private $_termDictionary;    /**     * Term Dictionary Index TermInfos     *     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because     * of performance considerations)     * [0] -> $docFreq     * [1] -> $freqPointer     * [2] -> $proxPointer     * [3] -> $skipOffset     * [4] -> $indexPointer     *     * @var array     */    private $_termDictionaryInfos;    /**     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment     *     * @var array     */    private $_fields;    /**     * Field positions in a dictionary.     * (Term dictionary contains filelds ordered by names)     *     * @var array     */    private $_fieldsDicPositions;    /**     * Associative array where the key is the file name and the value is data offset     * in a compound segment file (.csf).     *     * @var array     */    private $_segFiles;    /**     * Associative array where the key is the file name and the value is file size (.csf).     *     * @var array     */    private $_segFileSizes;    /**     * File system adapter.     *     * @var Zend_Search_Lucene_Storage_Directory_Filesystem     */    private $_directory;    /**     * Normalization factors.     * An array fieldName => normVector     * normVector is a binary string.     * Each byte corresponds to an indexed document in a segment and     * encodes normalization factor (float value, encoded by     * Zend_Search_Lucene_Search_Similarity::encodeNorm())     *     * @var array     */    private $_norms = array();    /**     * List of deleted documents.     * bitset if bitset extension is loaded or array otherwise.     *     * @var mixed     */    private $_deleted;    /**     * $this->_deleted update flag     *     * @var boolean     */    private $_deletedDirty = false;    /**     * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,     * Documents count and Directory as a parameter.     *     * @param string $name     * @param integer $docCount     * @param Zend_Search_Lucene_Storage_Directory $directory     */    public function __construct($name, $docCount, $directory)    {        $this->_name = $name;        $this->_docCount = $docCount;        $this->_directory = $directory;        $this->_termDictionary = null;        $this->_segFiles = array();        if ($this->_directory->fileExists($name . '.cfs')) {            $cfsFile = $this->_directory->getFileObject($name . '.cfs');            $segFilesCount = $cfsFile->readVInt();            for ($count = 0; $count < $segFilesCount; $count++) {                $dataOffset = $cfsFile->readLong();                if ($count != 0) {                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);                }                $fileName = $cfsFile->readString();                $this->_segFiles[$fileName] = $dataOffset;            }            if ($count != 0) {                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;            }        }        $fnmFile = $this->openCompoundFile('.fnm');        $fieldsCount = $fnmFile->readVInt();        $fieldNames = array();        $fieldNums  = array();        $this->_fields = array();        for ($count=0; $count < $fieldsCount; $count++) {            $fieldName = $fnmFile->readString();            $fieldBits = $fnmFile->readByte();            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,                                                                            $fieldBits & 1,                                                                            $count,                                                                            $fieldBits & 2 );            if ($fieldBits & 0x10) {                // norms are omitted for the indexed field                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);            }            $fieldNums[$count]  = $count;            $fieldNames[$count] = $fieldName;        }        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);        $this->_fieldsDicPositions = array_flip($fieldNums);        try {            $delFile = $this->openCompoundFile('.del');            $byteCount = $delFile->readInt();            $byteCount = ceil($byteCount/8);            $bitCount  = $delFile->readInt();            if ($bitCount == 0) {                $delBytes = '';            } else {                $delBytes = $delFile->readBytes($byteCount);            }            if (extension_loaded('bitset')) {                $this->_deleted = $delBytes;            } else {                $this->_deleted = array();                for ($count = 0; $count < $byteCount; $count++) {                    $byte = ord($delBytes{$count});                    for ($bit = 0; $bit < 8; $bit++) {                        if ($byte & (1<<$bit)) {                            $this->_deleted[$count*8 + $bit] = 1;                        }                    }                }            }        } catch(Zend_Search_Exception $e) {            if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {                $this->_deleted = null;            } else {                throw $e;            }        }    }    /**     * Opens index file stoted within compound index file     *     * @param string $extension     * @param boolean $shareHandler     * @throws Zend_Search_Lucene_Exception     * @return Zend_Search_Lucene_Storage_File     */    public function openCompoundFile($extension, $shareHandler = true)    {        $filename = $this->_name . $extension;        // Try to open common file first        if ($this->_directory->fileExists($filename)) {            return $this->_directory->getFileObject($filename, $shareHandler);        }        if( !isset($this->_segFiles[$filename]) ) {            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '                                       . $filename . ' file.' );        }        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);        $file->seek($this->_segFiles[$filename]);        return $file;    }    /**     * Get compound file length     *     * @param string $extension     * @return integer     */    public function compoundFileLength($extension)    {        $filename = $this->_name . $extension;        // Try to get common file first        if ($this->_directory->fileExists($filename)) {            return $this->_directory->fileLength($filename);        }        if( !isset($this->_segFileSizes[$filename]) ) {            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '                                       . $filename . ' file.' );        }        return $this->_segFileSizes[$filename];    }    /**     * Returns field index or -1 if field is not found     *     * @param string $fieldName     * @return integer     */    public function getFieldNum($fieldName)    {        foreach( $this->_fields as $field ) {            if( $field->name == $fieldName ) {                return $field->number;            }        }        return -1;    }    /**     * Returns field info for specified field     *     * @param integer $fieldNum     * @return ZSearchFieldInfo     */    public function getField($fieldNum)    {        return $this->_fields[$fieldNum];    }    /**     * Returns array of fields.     * if $indexed parameter is true, then returns only indexed fields.     *     * @param boolean $indexed     * @return array     */    public function getFields($indexed = false)    {        $result = array();        foreach( $this->_fields as $field ) {            if( (!$indexed) || $field->isIndexed ) {                $result[ $field->name ] = $field->name;            }        }        return $result;    }    /**     * Returns array of FieldInfo objects.     *     * @return array     */    public function getFieldInfos()    {        return $this->_fields;    }    /**     * Returns the total number of documents in this segment (including deleted documents).     *     * @return integer     */    public function count()    {        return $this->_docCount;    }    /**     * Returns number of deleted documents.     *     * @return integer     */    private function _deletedCount()    {        if ($this->_deleted === null) {            return 0;        }        if (extension_loaded('bitset')) {            return count(bitset_to_array($this->_deleted));        } else {            return count($this->_deleted);        }    }    /**     * Returns the total number of non-deleted documents in this segment.     *     * @return integer     */    public function numDocs()    {        if ($this->hasDeletions()) {            return $this->_docCount - $this->_deletedCount();        } else {            return $this->_docCount;        }    }    /**     * Get field position in a fields dictionary     *     * @param integer $fieldNum     * @return integer     */    private function _getFieldPosition($fieldNum) {        // Treat values which are not in a translation table as a 'direct value'        return isset($this->_fieldsDicPositions[$fieldNum]) ?                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;    }    /**     * Return segment name     *     * @return string     */    public function getName()    {        return $this->_name;    }    /**     * TermInfo cache     *     * Size is 1024.     * Numbers are used instead of class constants because of performance considerations     *     * @var array     */    private $_termInfoCache = array();    private function _cleanUpTermInfoCache()    {        // Clean 256 term infos        foreach ($this->_termInfoCache as $key => $termInfo) {            unset($this->_termInfoCache[$key]);            // leave 768 last used term infos            if (count($this->_termInfoCache) == 768) {                break;            }        }    }    /**     * Scans terms dictionary and returns term info     *     * @param Zend_Search_Lucene_Index_Term $term     * @return Zend_Search_Lucene_Index_TermInfo     */    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)    {        $termKey = $term->key();        if (isset($this->_termInfoCache[$termKey])) {            $termInfo = $this->_termInfoCache[$termKey];            // Move termInfo to the end of cache            unset($this->_termInfoCache[$termKey]);            $this->_termInfoCache[$termKey] = $termInfo;            return $termInfo;        }        if ($this->_termDictionary === null) {            // Check, if index is already serialized            if ($this->_directory->fileExists($this->_name . '.sti')) {                // Prefetch dictionary index data                $stiFile = $this->_directory->getFileObject($this->_name . '.sti');                $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));                // Load dictionary index data                list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData);            } else {                // Prefetch dictionary index data                $tiiFile = $this->openCompoundFile('.tii');                $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));                // Load dictionary index data                list($this->_termDictionary, $this->_termDictionaryInfos) =                            Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);                $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));                $stiFile = $this->_directory->createFile($this->_name . '.sti');                $stiFile->writeBytes($stiFileData);            }        }        $searchField = $this->getFieldNum($term->field);        if ($searchField == -1) {            return null;        }        $searchDicField = $this->_getFieldPosition($searchField);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?