lucene.php

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 1,038 行 · 第 1/3 页

PHP
1,038
字号
        return $doc;
    }


    /**
     * Returns true if index contain documents with specified term.
     *
     * Is used for query optimization.
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @return boolean
     */
    public function hasTerm(Zend_Search_Lucene_Index_Term $term)
    {
        foreach ($this->_segmentInfos as $segInfo) {
            if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) {
                return true;
            }
        }

        return false;
    }

    /**
     * Returns an array of all the documents which contain term.
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @return array
     */
    public function termDocs(Zend_Search_Lucene_Index_Term $term)
    {
        $result = array();
        $segmentStartDocId = 0;

        foreach ($this->_segmentInfos as $segInfo) {
            $termInfo = $segInfo->getTermInfo($term);

            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
                $segmentStartDocId += $segInfo->count();
                continue;
            }

            $frqFile = $segInfo->openCompoundFile('.frq');
            $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
            $docId = 0;
            for( $count=0; $count < $termInfo->docFreq; $count++ ) {
                $docDelta = $frqFile->readVInt();
                if( $docDelta % 2 == 1 ) {
                    $docId += ($docDelta-1)/2;
                } else {
                    $docId += $docDelta/2;
                    // read freq
                    $frqFile->readVInt();
                }

                $result[] = $segmentStartDocId + $docId;
            }

            $segmentStartDocId += $segInfo->count();
        }

        return $result;
    }


    /**
     * Returns an array of all term positions in the documents.
     * Return array structure: array( docId => array( pos1, pos2, ...), ...)
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @return array
     */
    public function termPositions(Zend_Search_Lucene_Index_Term $term)
    {
        $result = array();
        $segmentStartDocId = 0;
        foreach( $this->_segmentInfos as $segInfo ) {
            $termInfo = $segInfo->getTermInfo($term);

            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
                $segmentStartDocId += $segInfo->count();
                continue;
            }

            $frqFile = $segInfo->openCompoundFile('.frq');
            $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
            $freqs = array();
            $docId = 0;

            for( $count = 0; $count < $termInfo->docFreq; $count++ ) {
                $docDelta = $frqFile->readVInt();
                if( $docDelta % 2 == 1 ) {
                    $docId += ($docDelta-1)/2;
                    $freqs[ $docId ] = 1;
                } else {
                    $docId += $docDelta/2;
                    $freqs[ $docId ] = $frqFile->readVInt();
                }
            }

            $prxFile = $segInfo->openCompoundFile('.prx');
            $prxFile->seek($termInfo->proxPointer,SEEK_CUR);
            foreach ($freqs as $docId => $freq) {
                $termPosition = 0;
                $positions = array();

                for ($count = 0; $count < $freq; $count++ ) {
                    $termPosition += $prxFile->readVInt();
                    $positions[] = $termPosition;
                }

                $result[ $segmentStartDocId + $docId ] = $positions;
            }

            $segmentStartDocId += $segInfo->count();
        }

        return $result;
    }


    /**
     * Returns the number of documents in this index containing the $term.
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @return integer
     */
    public function docFreq(Zend_Search_Lucene_Index_Term $term)
    {
        $result = 0;
        foreach ($this->_segmentInfos as $segInfo) {
            $termInfo = $segInfo->getTermInfo($term);
            if ($termInfo !== null) {
                $result += $termInfo->docFreq;
            }
        }

        return $result;
    }


    /**
     * Retrive similarity used by index reader
     *
     * @return Zend_Search_Lucene_Search_Similarity
     */
    public function getSimilarity()
    {
        return Zend_Search_Lucene_Search_Similarity::getDefault();
    }


    /**
     * Returns a normalization factor for "field, document" pair.
     *
     * @param integer $id
     * @param string $fieldName
     * @return float
     */
    public function norm( $id, $fieldName )
    {
        if ($id >= $this->_docCount) {
            return null;
        }

        $segmentStartId = 0;
        foreach ($this->_segmentInfos as $segInfo) {
            if ($segmentStartId + $segInfo->count() > $id) {
                break;
            }

            $segmentStartId += $segInfo->count();
        }

        if ($segInfo->isDeleted($id - $segmentStartId)) {
            return 0;
        }

        return $segInfo->norm($id - $segmentStartId, $fieldName);
    }

    /**
     * Returns true if any documents have been deleted from this index.
     *
     * @return boolean
     */
    public function hasDeletions()
    {
        foreach ($this->_segmentInfos as $segmentInfo) {
            if ($segmentInfo->hasDeletions()) {
                return true;
            }
        }

        return false;
    }


    /**
     * Deletes a document from the index.
     * $id is an internal document id
     *
     * @param integer|Zend_Search_Lucene_Search_QueryHit $id
     * @throws Zend_Search_Lucene_Exception
     */
    public function delete($id)
    {
        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
            /* @var $id Zend_Search_Lucene_Search_QueryHit */
            $id = $id->id;
        }

        if ($id >= $this->_docCount) {
            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
        }

        $segmentStartId = 0;
        foreach ($this->_segmentInfos as $segmentInfo) {
            if ($segmentStartId + $segmentInfo->count() > $id) {
                break;
            }

            $segmentStartId += $segmentInfo->count();
        }
        $segmentInfo->delete($id - $segmentStartId);

        $this->_hasChanges = true;
    }



    /**
     * Adds a document to this index.
     *
     * @param Zend_Search_Lucene_Document $document
     */
    public function addDocument(Zend_Search_Lucene_Document $document)
    {
        $this->getIndexWriter()->addDocument($document);
        $this->_docCount++;
    }


    /**
     * Update document counter
     */
    private function _updateDocCount()
    {
        $this->_docCount = 0;
        foreach ($this->_segmentInfos as $segInfo) {
            $this->_docCount += $segInfo->count();
        }
    }

    /**
     * Commit changes resulting from delete() or undeleteAll() operations.
     *
     * @todo undeleteAll processing.
     */
    public function commit()
    {
        if ($this->_hasChanges) {
            foreach ($this->_segmentInfos as $segInfo) {
                $segInfo->writeChanges();
            }

            $this->_hasChanges = false;
        }

        if ($this->_writer !== null) {
            $this->_writer->commit();

            $this->_updateDocCount();
        }
    }


    /**
     * Optimize index.
     *
     * Merges all segments into one
     */
    public function optimize()
    {
        // Commit changes if any changes have been made
        $this->commit();

        if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
            $this->getIndexWriter()->optimize();
            $this->_updateDocCount();
        }
    }


    /**
     * Returns an array of all terms in this index.
     *
     * @return array
     */
    public function terms()
    {
        $result = array();

        $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();

        foreach ($this->_segmentInfos as $segmentInfo) {
            $segmentInfo->reset();

            // Skip "empty" segments
            if ($segmentInfo->currentTerm() !== null) {
                $segmentInfoQueue->put($segmentInfo);
            }
        }

        while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
            if ($segmentInfoQueue->top() === null ||
                $segmentInfoQueue->top()->currentTerm()->key() !=
                            $segmentInfo->currentTerm()->key()) {
                // We got new term
                $result[] = $segmentInfo->currentTerm();
            }

            $segmentInfo->nextTerm();
            // check, if segment dictionary is finished
            if ($segmentInfo->currentTerm() !== null) {
                // Put segment back into the priority queue
                $segmentInfoQueue->put($segmentInfo);
            }
        }

        return $result;
    }


    /*************************************************************************
    @todo UNIMPLEMENTED
    *************************************************************************/
    /**
     * Undeletes all documents currently marked as deleted in this index.
     *
     * @todo Implementation
     */
    public function undeleteAll()
    {}
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?