⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segmentmerger.php

📁 很棒的在线教学系统
💻 PHP
字号:
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category   Zend * @package    Zend_Search_Lucene * @subpackage Index * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license    http://framework.zend.com/license/new-bsd     New BSD License *//** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** Zend_Search_Lucene_Index_SegmentInfo */require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';/** * @category   Zend * @package    Zend_Search_Lucene * @subpackage Index * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license    http://framework.zend.com/license/new-bsd     New BSD License */class Zend_Search_Lucene_Index_SegmentMerger{    /**     * Target segment writer     *     * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter     */    private $_writer;    /**     * Number of docs in a new segment     *     * @var integer     */    private $_docCount;    /**     * A set of segments to be merged     *     * @var array Zend_Search_Lucene_Index_SegmentInfo     */    private $_segmentInfos = array();    /**     * Flag to signal, that merge is already done     *     * @var boolean     */    private $_mergeDone = false;    /**     * Field map     * [<segment_name>][<field_number>] => <target_field_number>     *     * @var array     */    private $_fieldsMap = array();    /**     * Object constructor.     *     * Creates new segment merger with $directory as target to merge segments into     * and $name as a name of new segment     *     * @param Zend_Search_Lucene_Storage_Directory $directory     * @param string $name     */    public function __construct($directory, $name)    {        $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);    }    /**     * Add segmnet to a collection of segments to be merged     *     * @param Zend_Search_Lucene_Index_SegmentInfo $segment     */    public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)    {        $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;    }    /**     * Do merge.     *     * Returns number of documents in newly created segment     *     * @return Zend_Search_Lucene_Index_SegmentInfo     * @throws Zend_Search_Lucene_Exception     */    public function merge()    {        if ($this->_mergeDone) {            throw new Zend_Search_Lucene_Exception('Merge is already done.');        }        if (count($this->_segmentInfos) < 1) {            throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('                                                 . count($this->_segmentInfos)                                                 . ').');        }        $this->_mergeFields();        $this->_mergeNorms();        $this->_mergeStoredFields();        $this->_mergeTerms();        $this->_mergeDone = true;        return $this->_writer->close();    }    /**     * Merge fields information     */    private function _mergeFields()    {        foreach ($this->_segmentInfos as $segName => $segmentInfo) {            foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {                $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);            }        }    }    /**     * Merge field's normalization factors     */    private function _mergeNorms()    {        foreach ($this->_writer->getFieldInfos() as $fieldInfo) {            if ($fieldInfo->isIndexed) {                foreach ($this->_segmentInfos as $segName => $segmentInfo) {                    if ($segmentInfo->hasDeletions()) {                        $srcNorm = $segmentInfo->normVector($fieldInfo->name);                        $norm    = '';                        $docs    = $segmentInfo->count();                        for ($count = 0; $count < $docs; $count++) {                            if (!$segmentInfo->isDeleted($count)) {                                $norm .= $srcNorm[$count];                            }                        }                        $this->_writer->addNorm($fieldInfo->name, $norm);                    } else {                        $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));                    }                }            }        }    }    /**     * Merge fields information     */    private function _mergeStoredFields()    {        $this->_docCount = 0;        foreach ($this->_segmentInfos as $segName => $segmentInfo) {            $fdtFile = $segmentInfo->openCompoundFile('.fdt');            for ($count = 0; $count < $segmentInfo->count(); $count++) {                $fieldCount = $fdtFile->readVInt();                $storedFields = array();                for ($count2 = 0; $count2 < $fieldCount; $count2++) {                    $fieldNum = $fdtFile->readVInt();                    $bits = $fdtFile->readByte();                    $fieldInfo = $segmentInfo->getField($fieldNum);                    if (!($bits & 2)) { // Text data                        $storedFields[] =                                 new Zend_Search_Lucene_Field($fieldInfo->name,                                                              $fdtFile->readString(),                                                              'UTF-8',                                                              true,                                                              $fieldInfo->isIndexed,                                                              $bits & 1 );                    } else {            // Binary data                        $storedFields[] =                                 new Zend_Search_Lucene_Field($fieldInfo->name,                                                              $fdtFile->readBinary(),                                                              '',                                                              true,                                                              $fieldInfo->isIndexed,                                                              $bits & 1,                                                              true);                    }                }                if (!$segmentInfo->isDeleted($count)) {                    $this->_docCount++;                    $this->_writer->addStoredFields($storedFields);                }            }        }    }    /**     * Merge fields information     */    private function _mergeTerms()    {        $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();        $segmentStartId = 0;        foreach ($this->_segmentInfos as $segName => $segmentInfo) {            $segmentStartId = $segmentInfo->reset($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);            // Skip "empty" segments            if ($segmentInfo->currentTerm() !== null) {                $segmentInfoQueue->put($segmentInfo);            }        }        $this->_writer->initializeDictionaryFiles();        $termDocs = array();        while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {            // Merge positions array            $termDocs += $segmentInfo->currentTermPositions();            if ($segmentInfoQueue->top() === null ||                $segmentInfoQueue->top()->currentTerm()->key() !=                            $segmentInfo->currentTerm()->key()) {                // We got new term                ksort($termDocs, SORT_NUMERIC);                // Add term if it's contained in any document                if (count($termDocs) > 0) {                    $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);                }                $termDocs = array();            }            $segmentInfo->nextTerm();            // check, if segment dictionary is finished            if ($segmentInfo->currentTerm() !== null) {                // Put segment back into the priority queue                $segmentInfoQueue->put($segmentInfo);            }        }        $this->_writer->closeDictionaryFiles();    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -