segmentinfo.php.svn-base

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 972 行 · 第 1/2 页

SVN-BASE
972
字号
        // search for appropriate value in dictionary        $lowIndex = 0;        $highIndex = count($this->_termDictionary)-1;        while ($highIndex >= $lowIndex) {            // $mid = ($highIndex - $lowIndex)/2;            $mid = ($highIndex + $lowIndex) >> 1;            $midTerm = $this->_termDictionary[$mid];            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);            $delta = $searchDicField - $fieldNum;            if ($delta == 0) {                $delta = strcmp($term->text, $midTerm[1] /* text */);            }            if ($delta < 0) {                $highIndex = $mid-1;            } elseif ($delta > 0) {                $lowIndex  = $mid+1;            } else {                // return $this->_termDictionaryInfos[$mid]; // We got it!                $a = $this->_termDictionaryInfos[$mid];                $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);                // Put loaded termInfo into cache                $this->_termInfoCache[$termKey] = $termInfo;                return $termInfo;            }        }        if ($highIndex == -1) {            // Term is out of the dictionary range            return null;        }        $prevPosition = $highIndex;        $prevTerm = $this->_termDictionary[$prevPosition];        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];        $tisFile = $this->openCompoundFile('.tis');        $tiVersion = $tisFile->readInt();        if ($tiVersion != (int)0xFFFFFFFE) {            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');        }        $termCount     = $tisFile->readLong();        $indexInterval = $tisFile->readInt();        $skipInterval  = $tisFile->readInt();        $tisFile->seek($prevTermInfo[4] /* indexPointer */ - 20 /* header size*/, SEEK_CUR);        $termValue    = $prevTerm[1] /* text */;        $termFieldNum = $prevTerm[0] /* field */;        $freqPointer = $prevTermInfo[1] /* freqPointer */;        $proxPointer = $prevTermInfo[2] /* proxPointer */;        for ($count = $prevPosition*$indexInterval + 1;             $count <= $termCount &&             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&               strcmp($termValue, $term->text) < 0) );             $count++) {            $termPrefixLength = $tisFile->readVInt();            $termSuffix       = $tisFile->readString();            $termFieldNum     = $tisFile->readVInt();            $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;            $docFreq      = $tisFile->readVInt();            $freqPointer += $tisFile->readVInt();            $proxPointer += $tisFile->readVInt();            if( $docFreq >= $skipInterval ) {                $skipOffset = $tisFile->readVInt();            } else {                $skipOffset = 0;            }        }        if ($termFieldNum == $searchField && $termValue == $term->text) {            $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);        } else {            $termInfo = null;        }        // Put loaded termInfo into cache        $this->_termInfoCache[$termKey] = $termInfo;        if (count($this->_termInfoCache) == 1024) {            $this->_cleanUpTermInfoCache();        }        return $termInfo;    }    /**     * Load normalizatin factors from an index file     *     * @param integer $fieldNum     */    private function _loadNorm($fieldNum)    {        $fFile = $this->openCompoundFile('.f' . $fieldNum);        $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);    }    /**     * Returns normalization factor for specified documents     *     * @param integer $id     * @param string $fieldName     * @return float     */    public function norm($id, $fieldName)    {        $fieldNum = $this->getFieldNum($fieldName);        if ( !($this->_fields[$fieldNum]->isIndexed) ) {            return null;        }        if (!isset($this->_norms[$fieldNum])) {            $this->_loadNorm($fieldNum);        }        return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );    }    /**     * Returns norm vector, encoded in a byte string     *     * @param string $fieldName     * @return string     */    public function normVector($fieldName)    {        $fieldNum = $this->getFieldNum($fieldName);        if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {            $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();            return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),                              $this->_docCount);        }        if (!isset($this->_norms[$fieldNum])) {            $this->_loadNorm($fieldNum);        }        return $this->_norms[$fieldNum];    }    /**     * Returns true if any documents have been deleted from this index segment.     *     * @return boolean     */    public function hasDeletions()    {        return $this->_deleted !== null;    }    /**     * Deletes a document from the index segment.     * $id is an internal document id     *     * @param integer     */    public function delete($id)    {        $this->_deletedDirty = true;        if (extension_loaded('bitset')) {            if ($this->_deleted === null) {                $this->_deleted = bitset_empty($id);            }            bitset_incl($this->_deleted, $id);        } else {            if ($this->_deleted === null) {                $this->_deleted = array();            }            $this->_deleted[$id] = 1;        }    }    /**     * Checks, that document is deleted     *     * @param integer     * @return boolean     */    public function isDeleted($id)    {        if ($this->_deleted === null) {            return false;        }        if (extension_loaded('bitset')) {            return bitset_in($this->_deleted, $id);        } else {            return isset($this->_deleted[$id]);        }    }    /**     * Write changes if it's necessary.     */    public function writeChanges()    {        if (!$this->_deletedDirty) {            return;        }        if (extension_loaded('bitset')) {            $delBytes = $this->_deleted;            $bitCount = count(bitset_to_array($delBytes));        } else {            $byteCount = floor($this->_docCount/8)+1;            $delBytes = str_repeat(chr(0), $byteCount);            for ($count = 0; $count < $byteCount; $count++) {                $byte = 0;                for ($bit = 0; $bit < 8; $bit++) {                    if (isset($this->_deleted[$count*8 + $bit])) {                        $byte |= (1<<$bit);                    }                }                $delBytes{$count} = chr($byte);            }            $bitCount = count($this->_deleted);        }        $delFile = $this->_directory->createFile($this->_name . '.del');        $delFile->writeInt($this->_docCount);        $delFile->writeInt($bitCount);        $delFile->writeBytes($delBytes);        $this->_deletedDirty = false;    }    /**     * Term Dictionary File object for stream like terms reading     *     * @var Zend_Search_Lucene_Storage_File     */    private $_tisFile = null;    /**     * Frequencies File object for stream like terms reading     *     * @var Zend_Search_Lucene_Storage_File     */    private $_frqFile = null;    /**     * Offset of the .frq file in the compound file     *     * @var integer     */    private $_frqFileOffset;    /**     * Positions File object for stream like terms reading     *     * @var Zend_Search_Lucene_Storage_File     */    private $_prxFile = null;    /**     * Offset of the .prx file in the compound file     *     * @var integer     */    private $_prxFileOffset;    /**     * Number of terms in term stream     *     * @var integer     */    private $_termCount = 0;    /**     * Segment skip interval     *     * @var integer     */    private $_skipInterval;    /**     * Last TermInfo in a terms stream     *     * @var Zend_Search_Lucene_Index_TermInfo     */    private $_lastTermInfo = null;    /**     * Last Term in a terms stream     *     * @var Zend_Search_Lucene_Index_Term     */    private $_lastTerm = null;    /**     * Map of the document IDs     * Used to get new docID after removing deleted documents.     * It's not very effective from memory usage point of view,     * but much more faster, then other methods     *     * @var array|null     */    private $_docMap = null;    /**     * An array of all term positions in the documents.     * Array structure: array( docId => array( pos1, pos2, ...), ...)     *     * @var array     */    private $_lastTermPositions;    /**     * Reset terms stream     *     * $startId - id for the fist document     * $compact - remove deleted documents     *     * Returns start document id for the next segment     *     * @param integer $startId     * @param boolean $compact     * @throws Zend_Search_Lucene_Exception     * @return integer     */    public function reset($startId = 0, $compact = false)    {        if ($this->_tisFile !== null) {            $this->_tisFile = null;        }        $this->_tisFile = $this->openCompoundFile('.tis', false);        $tiVersion = $this->_tisFile->readInt();        if ($tiVersion != (int)0xFFFFFFFE) {            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');        }        $this->_termCount    = $this->_tisFile->readLong();                               $this->_tisFile->readInt();  // Read Index interval        $this->_skipInterval = $this->_tisFile->readInt();  // Read skip interval        if ($this->_frqFile !== null) {            $this->_frqFile = null;        }        $this->_frqFile = $this->openCompoundFile('.frq', false);        $this->_frqFileOffset = $this->_frqFile->tell();        if ($this->_prxFile !== null) {            $this->_prxFile = null;        }        $this->_prxFile = $this->openCompoundFile('.prx', false);        $this->_prxFileOffset = $this->_prxFile->tell();        $this->_lastTerm     = new Zend_Search_Lucene_Index_Term('', -1);        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);        $this->_docMap = array();        for ($count = 0; $count < $this->_docCount; $count++) {            if (!$this->isDeleted($count)) {                $this->_docMap[$count] = $startId + ($compact ? count($this->_docMap) : $count);            }        }        $this->nextTerm();        return $startId + ($compact ? count($this->_docMap) : $this->_docCount);    }    /**     * Scans terms dictionary and returns next term     *     * @return Zend_Search_Lucene_Index_Term|null     */    public function nextTerm()    {        if ($this->_tisFile === null  ||  $this->_termCount == 0) {            $this->_lastTerm     = null;            $this->_lastTermInfo = null;            // may be necessary for "empty" segment            $this->_tisFile = null;            $this->_frqFile = null;            $this->_prxFile = null;            return null;        }        $termPrefixLength = $this->_tisFile->readVInt();        $termSuffix       = $this->_tisFile->readString();        $termFieldNum     = $this->_tisFile->readVInt();        $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;        $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);        $docFreq     = $this->_tisFile->readVInt();        $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();        $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();        if ($docFreq >= $this->_skipInterval) {            $skipOffset = $this->_tisFile->readVInt();        } else {            $skipOffset = 0;        }        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);        $this->_lastTermPositions = array();        $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);        $freqs = array();   $docId = 0;        for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {            $docDelta = $this->_frqFile->readVInt();            if( $docDelta % 2 == 1 ) {                $docId += ($docDelta-1)/2;                $freqs[ $docId ] = 1;            } else {                $docId += $docDelta/2;                $freqs[ $docId ] = $this->_frqFile->readVInt();            }        }        $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);        foreach ($freqs as $docId => $freq) {            $termPosition = 0;  $positions = array();            for ($count = 0; $count < $freq; $count++ ) {                $termPosition += $this->_prxFile->readVInt();                $positions[] = $termPosition;            }            if (isset($this->_docMap[$docId])) {                $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;            }        }        $this->_termCount--;        if ($this->_termCount == 0) {            $this->_tisFile = null;            $this->_frqFile = null;            $this->_prxFile = null;        }        return $this->_lastTerm;    }    /**     * Returns term in current position     *     * @param Zend_Search_Lucene_Index_Term $term     * @return Zend_Search_Lucene_Index_Term|null     */    public function currentTerm()    {        return $this->_lastTerm;    }    /**     * Returns an array of all term positions in the documents.     * Return array structure: array( docId => array( pos1, pos2, ...), ...)     *     * @return array     */    public function currentTermPositions()    {        return $this->_lastTermPositions;    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?