⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segmentwriter.php

📁 很棒的在线教学系统
💻 PHP
📖 第 1 页 / 共 2 页
字号:
    /**     * Term Dictionary index file     *     * @var Zend_Search_Lucene_Storage_File     */    private $_tiiFile = null;    /**     * Frequencies file     *     * @var Zend_Search_Lucene_Storage_File     */    private $_frqFile = null;    /**     * Positions file     *     * @var Zend_Search_Lucene_Storage_File     */    private $_prxFile = null;    /**     * Number of written terms     *     * @var integer     */    private $_termCount;    /**     * Last saved term     *     * @var Zend_Search_Lucene_Index_Term     */    private $_prevTerm;    /**     * Last saved term info     *     * @var Zend_Search_Lucene_Index_TermInfo     */    private $_prevTermInfo;    /**     * Last saved index term     *     * @var Zend_Search_Lucene_Index_Term     */    private $_prevIndexTerm;    /**     * Last saved index term info     *     * @var Zend_Search_Lucene_Index_TermInfo     */    private $_prevIndexTermInfo;    /**     * Last term dictionary file position     *     * @var integer     */    private $_lastIndexPosition;    /**     * Create dicrionary, frequency and positions files and write necessary headers     */    public function initializeDictionaryFiles()    {        $this->_tisFile = $this->_directory->createFile($this->_name . '.tis');        $this->_tisFile->writeInt((int)0xFFFFFFFD);        $this->_tisFile->writeLong(0 /* dummy data for terms count */);        $this->_tisFile->writeInt(self::$indexInterval);        $this->_tisFile->writeInt(self::$skipInterval);        $this->_tisFile->writeInt(self::$maxSkipLevels);        $this->_tiiFile = $this->_directory->createFile($this->_name . '.tii');        $this->_tiiFile->writeInt((int)0xFFFFFFFD);        $this->_tiiFile->writeLong(0 /* dummy data for terms count */);        $this->_tiiFile->writeInt(self::$indexInterval);        $this->_tiiFile->writeInt(self::$skipInterval);        $this->_tiiFile->writeInt(self::$maxSkipLevels);        /** Dump dictionary header */        $this->_tiiFile->writeVInt(0);                    // preffix length        $this->_tiiFile->writeString('');                 // suffix        $this->_tiiFile->writeInt((int)0xFFFFFFFF);       // field number        $this->_tiiFile->writeByte((int)0x0F);        $this->_tiiFile->writeVInt(0);                    // DocFreq        $this->_tiiFile->writeVInt(0);                    // FreqDelta        $this->_tiiFile->writeVInt(0);                    // ProxDelta        $this->_tiiFile->writeVInt(24);                   // IndexDelta        $this->_frqFile = $this->_directory->createFile($this->_name . '.frq');        $this->_prxFile = $this->_directory->createFile($this->_name . '.prx');        $this->_files[] = $this->_name . '.tis';        $this->_files[] = $this->_name . '.tii';        $this->_files[] = $this->_name . '.frq';        $this->_files[] = $this->_name . '.prx';        $this->_prevTerm          = null;        $this->_prevTermInfo      = null;        $this->_prevIndexTerm     = null;        $this->_prevIndexTermInfo = null;        $this->_lastIndexPosition = 24;        $this->_termCount         = 0;    }    /**     * Add term     *     * Term positions is an array( docId => array(pos1, pos2, pos3, ...), ... )     *     * @param Zend_Search_Lucene_Index_Term $termEntry     * @param array $termDocs     */    public function addTerm($termEntry, $termDocs)    {        $freqPointer = $this->_frqFile->tell();        $proxPointer = $this->_prxFile->tell();        $prevDoc = 0;        foreach ($termDocs as $docId => $termPositions) {            $docDelta = ($docId - $prevDoc)*2;            $prevDoc = $docId;            if (count($termPositions) > 1) {                $this->_frqFile->writeVInt($docDelta);                $this->_frqFile->writeVInt(count($termPositions));            } else {                $this->_frqFile->writeVInt($docDelta + 1);            }            $prevPosition = 0;            foreach ($termPositions as $position) {                $this->_prxFile->writeVInt($position - $prevPosition);                $prevPosition = $position;            }        }        if (count($termDocs) >= self::$skipInterval) {            /**             * @todo Write Skip Data to a freq file.             * It's not used now, but make index more optimal             */            $skipOffset = $this->_frqFile->tell() - $freqPointer;        } else {            $skipOffset = 0;        }        $term = new Zend_Search_Lucene_Index_Term($termEntry->text,                                                  $this->_fields[$termEntry->field]->number);        $termInfo = new Zend_Search_Lucene_Index_TermInfo(count($termDocs),                                                          $freqPointer, $proxPointer, $skipOffset);        $this->_dumpTermDictEntry($this->_tisFile, $this->_prevTerm, $term, $this->_prevTermInfo, $termInfo);        if (($this->_termCount + 1) % self::$indexInterval == 0) {            $this->_dumpTermDictEntry($this->_tiiFile, $this->_prevIndexTerm, $term, $this->_prevIndexTermInfo, $termInfo);            $indexPosition = $this->_tisFile->tell();            $this->_tiiFile->writeVInt($indexPosition - $this->_lastIndexPosition);            $this->_lastIndexPosition = $indexPosition;        }        $this->_termCount++;    }    /**     * Close dictionary     */    public function closeDictionaryFiles()    {        $this->_tisFile->seek(4);        $this->_tisFile->writeLong($this->_termCount);        $this->_tiiFile->seek(4);        $this->_tiiFile->writeLong(ceil(($this->_termCount + 2)/self::$indexInterval));    }    /**     * Dump Term Dictionary segment file entry.     * Used to write entry to .tis or .tii files     *     * @param Zend_Search_Lucene_Storage_File $dicFile     * @param Zend_Search_Lucene_Index_Term $prevTerm     * @param Zend_Search_Lucene_Index_Term $term     * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo     * @param Zend_Search_Lucene_Index_TermInfo $termInfo     */    protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile,                                        &$prevTerm,     Zend_Search_Lucene_Index_Term     $term,                                        &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo)    {        if (isset($prevTerm) && $prevTerm->field == $term->field) {            $matchedBytes = 0;            $maxBytes = min(strlen($prevTerm->text), strlen($term->text));            while ($matchedBytes < $maxBytes  &&                   $prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) {                $matchedBytes++;            }            // Calculate actual matched UTF-8 pattern            $prefixBytes = 0;            $prefixChars = 0;            while ($prefixBytes < $matchedBytes) {                $charBytes = 1;                if ((ord($term->text[$prefixBytes]) & 0xC0) == 0xC0) {                    $charBytes++;                    if (ord($term->text[$prefixBytes]) & 0x20 ) {                        $charBytes++;                        if (ord($term->text[$prefixBytes]) & 0x10 ) {                            $charBytes++;                        }                    }                }                if ($prefixBytes + $charBytes > $matchedBytes) {                    // char crosses matched bytes boundary                    // skip char                    break;                }                $prefixChars++;                $prefixBytes += $charBytes;            }            // Write preffix length            $dicFile->writeVInt($prefixChars);            // Write suffix            $dicFile->writeString(substr($term->text, $prefixBytes));        } else {            // Write preffix length            $dicFile->writeVInt(0);            // Write suffix            $dicFile->writeString($term->text);        }        // Write field number        $dicFile->writeVInt($term->field);        // DocFreq (the count of documents which contain the term)        $dicFile->writeVInt($termInfo->docFreq);        $prevTerm = $term;        if (!isset($prevTermInfo)) {            // Write FreqDelta            $dicFile->writeVInt($termInfo->freqPointer);            // Write ProxDelta            $dicFile->writeVInt($termInfo->proxPointer);        } else {            // Write FreqDelta            $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer);            // Write ProxDelta            $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer);        }        // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval        if ($termInfo->skipOffset != 0) {            $dicFile->writeVInt($termInfo->skipOffset);        }        $prevTermInfo = $termInfo;    }    /**     * Generate compound index file     */    protected function _generateCFS()    {        $cfsFile = $this->_directory->createFile($this->_name . '.cfs');        $cfsFile->writeVInt(count($this->_files));        $dataOffsetPointers = array();        foreach ($this->_files as $fileName) {            $dataOffsetPointers[$fileName] = $cfsFile->tell();            $cfsFile->writeLong(0); // write dummy data            $cfsFile->writeString($fileName);        }        foreach ($this->_files as $fileName) {            // Get actual data offset            $dataOffset = $cfsFile->tell();            // Seek to the data offset pointer            $cfsFile->seek($dataOffsetPointers[$fileName]);            // Write actual data offset value            $cfsFile->writeLong($dataOffset);            // Seek back to the end of file            $cfsFile->seek($dataOffset);            $dataFile = $this->_directory->getFileObject($fileName);            $byteCount = $this->_directory->fileLength($fileName);            while ($byteCount > 0) {                $data = $dataFile->readBytes(min($byteCount, 131072 /*128Kb*/));                $byteCount -= strlen($data);                $cfsFile->writeBytes($data);            }            $this->_directory->deleteFile($fileName);        }    }    /**     * Close segment, write it to disk and return segment info     *     * @return Zend_Search_Lucene_Index_SegmentInfo     */    abstract public function close();}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -