lucene.php.svn-base

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 1,038 行 · 第 1/2 页

SVN-BASE
1,038
字号
                }            }        }        if (count($hits) == 0) {            // skip sorting, which may cause a error on empty index        	return array();        }        if ($topScore > 1) {            $normalizedScores = array();            foreach ($scores as $score) {                $normalizedScores[] = $score/$topScore;            }            $scores = $normalizedScores;        }        if (func_num_args() == 1) {            // sort by scores            array_multisort($scores, SORT_DESC, SORT_NUMERIC,                            $ids,    SORT_ASC,  SORT_NUMERIC,                            $hits);        } else {            // sort by given field names            $argList    = func_get_args();            $fieldNames = $this->getFieldNames();            $sortArgs   = array();            for ($count = 1; $count < count($argList); $count++) {                $fieldName = $argList[$count];                if (!is_string($fieldName)) {                    throw new Zend_Search_Lucene_Exception('Field name must be a string.');                }                if (!in_array($fieldName, $fieldNames)) {                    throw new Zend_Search_Lucene_Exception('Wrong field name.');                }                $valuesArray = array();                foreach ($hits as $hit) {                    try {                        $value = $hit->getDocument()->getFieldValue($fieldName);                    } catch (Zend_Search_Lucene_Exception $e) {                        if (strpos($e->getMessage(), 'not found') === false) {                            throw $e;                        } else {                            $value = null;                        }                    }                    $valuesArray[] = $value;                }                $sortArgs[] = $valuesArray;                if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {                    $count++;                    $sortArgs[] = $argList[$count];                    if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {                        $count++;                        $sortArgs[] = $argList[$count];                    } else {                        if ($argList[$count] == SORT_ASC  || $argList[$count] == SORT_DESC) {                            $sortArgs[] = SORT_REGULAR;                        } else {                            $sortArgs[] = SORT_ASC;                        }                    }                } else {                    $sortArgs[] = SORT_ASC;                    $sortArgs[] = SORT_REGULAR;                }            }            // Sort by id's if values are equal            $sortArgs[] = $ids;            $sortArgs[] = SORT_ASC;            $sortArgs[] = SORT_NUMERIC;            // Array to be sorted            $sortArgs[] = &$hits;            // Do sort            call_user_func_array('array_multisort', $sortArgs);        }        return $hits;    }    /**     * Returns a list of all unique field names that exist in this index.     *     * @param boolean $indexed     * @return array     */    public function getFieldNames($indexed = false)    {        $result = array();        foreach( $this->_segmentInfos as $segmentInfo ) {            $result = array_merge($result, $segmentInfo->getFields($indexed));        }        return $result;    }    /**     * Returns a Zend_Search_Lucene_Document object for the document     * number $id in this index.     *     * @param integer|Zend_Search_Lucene_Search_QueryHit $id     * @return Zend_Search_Lucene_Document     */    public function getDocument($id)    {        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {            /* @var $id Zend_Search_Lucene_Search_QueryHit */            $id = $id->id;        }        if ($id >= $this->_docCount) {            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');        }        $segmentStartId = 0;        foreach ($this->_segmentInfos as $segmentInfo) {            if ($segmentStartId + $segmentInfo->count() > $id) {                break;            }            $segmentStartId += $segmentInfo->count();        }        $fdxFile = $segmentInfo->openCompoundFile('.fdx');        $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );        $fieldValuesPosition = $fdxFile->readLong();        $fdtFile = $segmentInfo->openCompoundFile('.fdt');        $fdtFile->seek($fieldValuesPosition, SEEK_CUR);        $fieldCount = $fdtFile->readVInt();        $doc = new Zend_Search_Lucene_Document();        for ($count = 0; $count < $fieldCount; $count++) {            $fieldNum = $fdtFile->readVInt();            $bits = $fdtFile->readByte();            $fieldInfo = $segmentInfo->getField($fieldNum);            if (!($bits & 2)) { // Text data                $field = new Zend_Search_Lucene_Field($fieldInfo->name,                                                      $fdtFile->readString(),                                                      'UTF-8',                                                      true,                                                      $fieldInfo->isIndexed,                                                      $bits & 1 );            } else {            // Binary data                $field = new Zend_Search_Lucene_Field($fieldInfo->name,                                                      $fdtFile->readBinary(),                                                      '',                                                      true,                                                      $fieldInfo->isIndexed,                                                      $bits & 1,                                                      true );            }            $doc->addField($field);        }        return $doc;    }    /**     * Returns true if index contain documents with specified term.     *     * Is used for query optimization.     *     * @param Zend_Search_Lucene_Index_Term $term     * @return boolean     */    public function hasTerm(Zend_Search_Lucene_Index_Term $term)    {        foreach ($this->_segmentInfos as $segInfo) {            if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) {                return true;            }        }        return false;    }    /**     * Returns an array of all the documents which contain term.     *     * @param Zend_Search_Lucene_Index_Term $term     * @return array     */    public function termDocs(Zend_Search_Lucene_Index_Term $term)    {        $result = array();        $segmentStartDocId = 0;        foreach ($this->_segmentInfos as $segInfo) {            $termInfo = $segInfo->getTermInfo($term);            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {                $segmentStartDocId += $segInfo->count();                continue;            }            $frqFile = $segInfo->openCompoundFile('.frq');            $frqFile->seek($termInfo->freqPointer,SEEK_CUR);            $docId = 0;            for( $count=0; $count < $termInfo->docFreq; $count++ ) {                $docDelta = $frqFile->readVInt();                if( $docDelta % 2 == 1 ) {                    $docId += ($docDelta-1)/2;                } else {                    $docId += $docDelta/2;                    // read freq                    $frqFile->readVInt();                }                $result[] = $segmentStartDocId + $docId;            }            $segmentStartDocId += $segInfo->count();        }        return $result;    }    /**     * Returns an array of all term positions in the documents.     * Return array structure: array( docId => array( pos1, pos2, ...), ...)     *     * @param Zend_Search_Lucene_Index_Term $term     * @return array     */    public function termPositions(Zend_Search_Lucene_Index_Term $term)    {        $result = array();        $segmentStartDocId = 0;        foreach( $this->_segmentInfos as $segInfo ) {            $termInfo = $segInfo->getTermInfo($term);            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {                $segmentStartDocId += $segInfo->count();                continue;            }            $frqFile = $segInfo->openCompoundFile('.frq');            $frqFile->seek($termInfo->freqPointer,SEEK_CUR);            $freqs = array();            $docId = 0;            for( $count = 0; $count < $termInfo->docFreq; $count++ ) {                $docDelta = $frqFile->readVInt();                if( $docDelta % 2 == 1 ) {                    $docId += ($docDelta-1)/2;                    $freqs[ $docId ] = 1;                } else {                    $docId += $docDelta/2;                    $freqs[ $docId ] = $frqFile->readVInt();                }            }            $prxFile = $segInfo->openCompoundFile('.prx');            $prxFile->seek($termInfo->proxPointer,SEEK_CUR);            foreach ($freqs as $docId => $freq) {                $termPosition = 0;                $positions = array();                for ($count = 0; $count < $freq; $count++ ) {                    $termPosition += $prxFile->readVInt();                    $positions[] = $termPosition;                }                $result[ $segmentStartDocId + $docId ] = $positions;            }            $segmentStartDocId += $segInfo->count();        }        return $result;    }    /**     * Returns the number of documents in this index containing the $term.     *     * @param Zend_Search_Lucene_Index_Term $term     * @return integer     */    public function docFreq(Zend_Search_Lucene_Index_Term $term)    {        $result = 0;        foreach ($this->_segmentInfos as $segInfo) {            $termInfo = $segInfo->getTermInfo($term);            if ($termInfo !== null) {                $result += $termInfo->docFreq;            }        }        return $result;    }    /**     * Retrive similarity used by index reader     *     * @return Zend_Search_Lucene_Search_Similarity     */    public function getSimilarity()    {        return Zend_Search_Lucene_Search_Similarity::getDefault();    }    /**     * Returns a normalization factor for "field, document" pair.     *     * @param integer $id     * @param string $fieldName     * @return float     */    public function norm( $id, $fieldName )    {        if ($id >= $this->_docCount) {            return null;        }        $segmentStartId = 0;        foreach ($this->_segmentInfos as $segInfo) {            if ($segmentStartId + $segInfo->count() > $id) {                break;            }            $segmentStartId += $segInfo->count();        }        if ($segInfo->isDeleted($id - $segmentStartId)) {            return 0;        }        return $segInfo->norm($id - $segmentStartId, $fieldName);    }    /**     * Returns true if any documents have been deleted from this index.     *     * @return boolean     */    public function hasDeletions()    {        foreach ($this->_segmentInfos as $segmentInfo) {            if ($segmentInfo->hasDeletions()) {                return true;            }        }        return false;    }    /**     * Deletes a document from the index.     * $id is an internal document id     *     * @param integer|Zend_Search_Lucene_Search_QueryHit $id     * @throws Zend_Search_Lucene_Exception     */    public function delete($id)    {        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {            /* @var $id Zend_Search_Lucene_Search_QueryHit */            $id = $id->id;        }        if ($id >= $this->_docCount) {            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');        }        $segmentStartId = 0;        foreach ($this->_segmentInfos as $segmentInfo) {            if ($segmentStartId + $segmentInfo->count() > $id) {                break;            }            $segmentStartId += $segmentInfo->count();        }        $segmentInfo->delete($id - $segmentStartId);        $this->_hasChanges = true;    }    /**     * Adds a document to this index.     *     * @param Zend_Search_Lucene_Document $document     */    public function addDocument(Zend_Search_Lucene_Document $document)    {        $this->getIndexWriter()->addDocument($document);        $this->_docCount++;    }    /**     * Update document counter     */    private function _updateDocCount()    {        $this->_docCount = 0;        foreach ($this->_segmentInfos as $segInfo) {            $this->_docCount += $segInfo->count();        }    }    /**     * Commit changes resulting from delete() or undeleteAll() operations.     *     * @todo undeleteAll processing.     */    public function commit()    {        if ($this->_hasChanges) {            foreach ($this->_segmentInfos as $segInfo) {                $segInfo->writeChanges();            }            $this->_hasChanges = false;        }        if ($this->_writer !== null) {            $this->_writer->commit();            $this->_updateDocCount();        }    }    /**     * Optimize index.     *     * Merges all segments into one     */    public function optimize()    {        // Commit changes if any changes have been made        $this->commit();        if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {            $this->getIndexWriter()->optimize();            $this->_updateDocCount();        }    }    /**     * Returns an array of all terms in this index.     *     * @return array     */    public function terms()    {        $result = array();        $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();        foreach ($this->_segmentInfos as $segmentInfo) {            $segmentInfo->reset();            // Skip "empty" segments            if ($segmentInfo->currentTerm() !== null) {                $segmentInfoQueue->put($segmentInfo);            }        }        while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {            if ($segmentInfoQueue->top() === null ||                $segmentInfoQueue->top()->currentTerm()->key() !=                            $segmentInfo->currentTerm()->key()) {                // We got new term                $result[] = $segmentInfo->currentTerm();            }            $segmentInfo->nextTerm();            // check, if segment dictionary is finished            if ($segmentInfo->currentTerm() !== null) {                // Put segment back into the priority queue                $segmentInfoQueue->put($segmentInfo);            }        }        return $result;    }    /*************************************************************************    @todo UNIMPLEMENTED    *************************************************************************/    /**     * Undeletes all documents currently marked as deleted in this index.     *     * @todo Implementation     */    public function undeleteAll()    {}}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?