⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segmentinfo.php

📁 很棒的在线教学系统
💻 PHP
📖 第 1 页 / 共 4 页
字号:
     *     * Values:     *     * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved     * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved     *                       document numbers are compacted (shifted if segment has deleted documents)     *     * @var integer     */    private $_termsScanMode;    /** Scan modes */    const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved    const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved    const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved                                // document numbers are compacted (shifted if segment contains deleted documents)    /**     * Reset terms stream     *     * $startId - id for the fist document     * $compact - remove deleted documents     *     * Returns start document id for the next segment     *     * @param integer $startId     * @param integer $mode     * @throws Zend_Search_Lucene_Exception     * @return integer     */    public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)    {        if ($this->_tisFile !== null) {            $this->_tisFile = null;        }        $this->_tisFile = $this->openCompoundFile('.tis', false);        $this->_tisFileOffset = $this->_tisFile->tell();        $tiVersion = $this->_tisFile->readInt();        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');        }        $this->_termCount     =              $this->_termNum = $this->_tisFile->readLong(); // Read terms count        $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval        $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {            $maxSkipLevels = $this->_tisFile->readInt();        }        if ($this->_frqFile !== null) {            $this->_frqFile = null;        }        if ($this->_prxFile !== null) {            $this->_prxFile = null;        }        $this->_docMap = array();        $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);        $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);        $this->_lastTermPositions = null;        $this->_termsScanMode = $mode;        switch ($mode) {            case self::SM_TERMS_ONLY:                // Do nothing                break;            case self::SM_FULL_INFO:                // break intentionally omitted            case self::SM_MERGE_INFO:                $this->_frqFile = $this->openCompoundFile('.frq', false);                $this->_frqFileOffset = $this->_frqFile->tell();                $this->_prxFile = $this->openCompoundFile('.prx', false);                $this->_prxFileOffset = $this->_prxFile->tell();                for ($count = 0; $count < $this->_docCount; $count++) {                    if (!$this->isDeleted($count)) {                        $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);                    }                }                break;            default:                throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');                break;        }        $this->nextTerm();        return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);    }    /**     * Skip terms stream up to specified term preffix.     *     * Prefix contains fully specified field info and portion of searched term     *     * @param Zend_Search_Lucene_Index_Term $prefix     * @throws Zend_Search_Lucene_Exception     */    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)    {        if ($this->_termDictionary === null) {            $this->_loadDictionaryIndex();        }        $searchField = $this->getFieldNum($prefix->field);        if ($searchField == -1) {            /**             * Field is not presented in this segment             * Go to the end of dictionary             */            $this->_tisFile = null;            $this->_frqFile = null;            $this->_prxFile = null;            $this->_lastTerm          = null;            $this->_lastTermInfo      = null;            $this->_lastTermPositions = null;            return;        }        $searchDicField = $this->_getFieldPosition($searchField);        // search for appropriate value in dictionary        $lowIndex = 0;        $highIndex = count($this->_termDictionary)-1;        while ($highIndex >= $lowIndex) {            // $mid = ($highIndex - $lowIndex)/2;            $mid = ($highIndex + $lowIndex) >> 1;            $midTerm = $this->_termDictionary[$mid];            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);            $delta = $searchDicField - $fieldNum;            if ($delta == 0) {                $delta = strcmp($prefix->text, $midTerm[1] /* text */);            }            if ($delta < 0) {                $highIndex = $mid-1;            } elseif ($delta > 0) {                $lowIndex  = $mid+1;            } else {                // We have reached term we are looking for                break;            }        }        if ($highIndex == -1) {            // Term is out of the dictionary range            $this->_tisFile = null;            $this->_frqFile = null;            $this->_prxFile = null;            $this->_lastTerm          = null;            $this->_lastTermInfo      = null;            $this->_lastTermPositions = null;            return;        }        $prevPosition = $highIndex;        $prevTerm = $this->_termDictionary[$prevPosition];        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];        if ($this->_tisFile === null) {            // The end of terms stream is reached and terms dictionary file is closed            // Perform mini-reset operation            $this->_tisFile = $this->openCompoundFile('.tis', false);            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {                $this->_frqFile = $this->openCompoundFile('.frq', false);                $this->_prxFile = $this->openCompoundFile('.prx', false);            }        }        $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);        $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,                                                                 ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,                                                                     $prevTermInfo[1] /* freqPointer */,                                                                     $prevTermInfo[2] /* proxPointer */,                                                                     $prevTermInfo[3] /* skipOffset */);        $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;        if ($highIndex == 0) {            // skip start entry            $this->nextTerm();        } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {            // We got exact match in the dictionary index            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {                $this->_lastTermPositions = array();                $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);                $freqs = array();   $docId = 0;                for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {                    $docDelta = $this->_frqFile->readVInt();                    if( $docDelta % 2 == 1 ) {                        $docId += ($docDelta-1)/2;                        $freqs[ $docId ] = 1;                    } else {                        $docId += $docDelta/2;                        $freqs[ $docId ] = $this->_frqFile->readVInt();                    }                }                $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);                foreach ($freqs as $docId => $freq) {                    $termPosition = 0;  $positions = array();                    for ($count = 0; $count < $freq; $count++ ) {                        $termPosition += $this->_prxFile->readVInt();                        $positions[] = $termPosition;                    }                    if (isset($this->_docMap[$docId])) {                        $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;                    }                }            }            return;        }        // Search term matching specified prefix        while ($this->_lastTerm !== null) {            if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||                 ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {                    // Current term matches or greate than the pattern                    return;            }            $this->nextTerm();        }    }    /**     * Scans terms dictionary and returns next term     *     * @return Zend_Search_Lucene_Index_Term|null     */    public function nextTerm()    {        if ($this->_tisFile === null  ||  $this->_termCount == 0) {            $this->_lastTerm          = null;            $this->_lastTermInfo      = null;            $this->_lastTermPositions = null;            $this->_docMap            = null;            // may be necessary for "empty" segment            $this->_tisFile = null;            $this->_frqFile = null;            $this->_prxFile = null;            return null;        }        $termPrefixLength = $this->_tisFile->readVInt();        $termSuffix       = $this->_tisFile->readString();        $termFieldNum     = $this->_tisFile->readVInt();        $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;        $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);        $docFreq     = $this->_tisFile->readVInt();        $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();        $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();        if ($docFreq >= $this->_skipInterval) {            $skipOffset = $this->_tisFile->readVInt();        } else {            $skipOffset = 0;        }        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);        if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {            $this->_lastTermPositions = array();            $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);            $freqs = array();   $docId = 0;            for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {                $docDelta = $this->_frqFile->readVInt();                if( $docDelta % 2 == 1 ) {                    $docId += ($docDelta-1)/2;                    $freqs[ $docId ] = 1;                } else {                    $docId += $docDelta/2;                    $freqs[ $docId ] = $this->_frqFile->readVInt();                }            }            $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);            foreach ($freqs as $docId => $freq) {                $termPosition = 0;  $positions = array();                for ($count = 0; $count < $freq; $count++ ) {                    $termPosition += $this->_prxFile->readVInt();                    $positions[] = $termPosition;                }                if (isset($this->_docMap[$docId])) {                    $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;                }            }        }        $this->_termCount--;        if ($this->_termCount == 0) {            $this->_tisFile = null;            $this->_frqFile = null;            $this->_prxFile = null;        }        return $this->_lastTerm;    }    /**     * Close terms stream     *     * Should be used for resources clean up if stream is not read up to the end     */    public function closeTermsStream()    {        $this->_tisFile = null;        $this->_frqFile = null;        $this->_prxFile = null;        $this->_lastTerm          = null;        $this->_lastTermInfo      = null;        $this->_lastTermPositions = null;        $this->_docMap            = null;    }    /**     * Returns term in current position     *     * @return Zend_Search_Lucene_Index_Term|null     */    public function currentTerm()    {        return $this->_lastTerm;    }    /**     * Returns an array of all term positions in the documents.     * Return array structure: array( docId => array( pos1, pos2, ...), ...)     *     * @return array     */    public function currentTermPositions()    {        return $this->_lastTermPositions;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -