lucene.php.svn-base
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 1,038 行 · 第 1/2 页
SVN-BASE
1,038 行
} } } if (count($hits) == 0) { // skip sorting, which may cause a error on empty index return array(); } if ($topScore > 1) { $normalizedScores = array(); foreach ($scores as $score) { $normalizedScores[] = $score/$topScore; } $scores = $normalizedScores; } if (func_num_args() == 1) { // sort by scores array_multisort($scores, SORT_DESC, SORT_NUMERIC, $ids, SORT_ASC, SORT_NUMERIC, $hits); } else { // sort by given field names $argList = func_get_args(); $fieldNames = $this->getFieldNames(); $sortArgs = array(); for ($count = 1; $count < count($argList); $count++) { $fieldName = $argList[$count]; if (!is_string($fieldName)) { throw new Zend_Search_Lucene_Exception('Field name must be a string.'); } if (!in_array($fieldName, $fieldNames)) { throw new Zend_Search_Lucene_Exception('Wrong field name.'); } $valuesArray = array(); foreach ($hits as $hit) { try { $value = $hit->getDocument()->getFieldValue($fieldName); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'not found') === false) { throw $e; } else { $value = null; } } $valuesArray[] = $value; } $sortArgs[] = $valuesArray; if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { $count++; $sortArgs[] = $argList[$count]; if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { $count++; $sortArgs[] = $argList[$count]; } else { if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) { $sortArgs[] = SORT_REGULAR; } else { $sortArgs[] = SORT_ASC; } } } else { $sortArgs[] = SORT_ASC; $sortArgs[] = SORT_REGULAR; } } // Sort by id's if values are equal $sortArgs[] = $ids; $sortArgs[] = SORT_ASC; $sortArgs[] = SORT_NUMERIC; // Array to be sorted $sortArgs[] = &$hits; // Do sort call_user_func_array('array_multisort', $sortArgs); } return $hits; } /** * Returns a list of all unique field names that exist in this index. * * @param boolean $indexed * @return array */ public function getFieldNames($indexed = false) { $result = array(); foreach( $this->_segmentInfos as $segmentInfo ) { $result = array_merge($result, $segmentInfo->getFields($indexed)); } return $result; } /** * Returns a Zend_Search_Lucene_Document object for the document * number $id in this index. * * @param integer|Zend_Search_Lucene_Search_QueryHit $id * @return Zend_Search_Lucene_Document */ public function getDocument($id) { if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { /* @var $id Zend_Search_Lucene_Search_QueryHit */ $id = $id->id; } if ($id >= $this->_docCount) { throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); } $segmentStartId = 0; foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentStartId + $segmentInfo->count() > $id) { break; } $segmentStartId += $segmentInfo->count(); } $fdxFile = $segmentInfo->openCompoundFile('.fdx'); $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR ); $fieldValuesPosition = $fdxFile->readLong(); $fdtFile = $segmentInfo->openCompoundFile('.fdt'); $fdtFile->seek($fieldValuesPosition, SEEK_CUR); $fieldCount = $fdtFile->readVInt(); $doc = new Zend_Search_Lucene_Document(); for ($count = 0; $count < $fieldCount; $count++) { $fieldNum = $fdtFile->readVInt(); $bits = $fdtFile->readByte(); $fieldInfo = $segmentInfo->getField($fieldNum); if (!($bits & 2)) { // Text data $field = new Zend_Search_Lucene_Field($fieldInfo->name, $fdtFile->readString(), 'UTF-8', true, $fieldInfo->isIndexed, $bits & 1 ); } else { // Binary data $field = new Zend_Search_Lucene_Field($fieldInfo->name, $fdtFile->readBinary(), '', true, $fieldInfo->isIndexed, $bits & 1, true ); } $doc->addField($field); } return $doc; } /** * Returns true if index contain documents with specified term. * * Is used for query optimization. * * @param Zend_Search_Lucene_Index_Term $term * @return boolean */ public function hasTerm(Zend_Search_Lucene_Index_Term $term) { foreach ($this->_segmentInfos as $segInfo) { if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) { return true; } } return false; } /** * Returns an array of all the documents which contain term. * * @param Zend_Search_Lucene_Index_Term $term * @return array */ public function termDocs(Zend_Search_Lucene_Index_Term $term) { $result = array(); $segmentStartDocId = 0; foreach ($this->_segmentInfos as $segInfo) { $termInfo = $segInfo->getTermInfo($term); if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { $segmentStartDocId += $segInfo->count(); continue; } $frqFile = $segInfo->openCompoundFile('.frq'); $frqFile->seek($termInfo->freqPointer,SEEK_CUR); $docId = 0; for( $count=0; $count < $termInfo->docFreq; $count++ ) { $docDelta = $frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; } else { $docId += $docDelta/2; // read freq $frqFile->readVInt(); } $result[] = $segmentStartDocId + $docId; } $segmentStartDocId += $segInfo->count(); } return $result; } /** * Returns an array of all term positions in the documents. * Return array structure: array( docId => array( pos1, pos2, ...), ...) * * @param Zend_Search_Lucene_Index_Term $term * @return array */ public function termPositions(Zend_Search_Lucene_Index_Term $term) { $result = array(); $segmentStartDocId = 0; foreach( $this->_segmentInfos as $segInfo ) { $termInfo = $segInfo->getTermInfo($term); if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { $segmentStartDocId += $segInfo->count(); continue; } $frqFile = $segInfo->openCompoundFile('.frq'); $frqFile->seek($termInfo->freqPointer,SEEK_CUR); $freqs = array(); $docId = 0; for( $count = 0; $count < $termInfo->docFreq; $count++ ) { $docDelta = $frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; $freqs[ $docId ] = 1; } else { $docId += $docDelta/2; $freqs[ $docId ] = $frqFile->readVInt(); } } $prxFile = $segInfo->openCompoundFile('.prx'); $prxFile->seek($termInfo->proxPointer,SEEK_CUR); foreach ($freqs as $docId => $freq) { $termPosition = 0; $positions = array(); for ($count = 0; $count < $freq; $count++ ) { $termPosition += $prxFile->readVInt(); $positions[] = $termPosition; } $result[ $segmentStartDocId + $docId ] = $positions; } $segmentStartDocId += $segInfo->count(); } return $result; } /** * Returns the number of documents in this index containing the $term. * * @param Zend_Search_Lucene_Index_Term $term * @return integer */ public function docFreq(Zend_Search_Lucene_Index_Term $term) { $result = 0; foreach ($this->_segmentInfos as $segInfo) { $termInfo = $segInfo->getTermInfo($term); if ($termInfo !== null) { $result += $termInfo->docFreq; } } return $result; } /** * Retrive similarity used by index reader * * @return Zend_Search_Lucene_Search_Similarity */ public function getSimilarity() { return Zend_Search_Lucene_Search_Similarity::getDefault(); } /** * Returns a normalization factor for "field, document" pair. * * @param integer $id * @param string $fieldName * @return float */ public function norm( $id, $fieldName ) { if ($id >= $this->_docCount) { return null; } $segmentStartId = 0; foreach ($this->_segmentInfos as $segInfo) { if ($segmentStartId + $segInfo->count() > $id) { break; } $segmentStartId += $segInfo->count(); } if ($segInfo->isDeleted($id - $segmentStartId)) { return 0; } return $segInfo->norm($id - $segmentStartId, $fieldName); } /** * Returns true if any documents have been deleted from this index. * * @return boolean */ public function hasDeletions() { foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentInfo->hasDeletions()) { return true; } } return false; } /** * Deletes a document from the index. * $id is an internal document id * * @param integer|Zend_Search_Lucene_Search_QueryHit $id * @throws Zend_Search_Lucene_Exception */ public function delete($id) { if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { /* @var $id Zend_Search_Lucene_Search_QueryHit */ $id = $id->id; } if ($id >= $this->_docCount) { throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); } $segmentStartId = 0; foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentStartId + $segmentInfo->count() > $id) { break; } $segmentStartId += $segmentInfo->count(); } $segmentInfo->delete($id - $segmentStartId); $this->_hasChanges = true; } /** * Adds a document to this index. * * @param Zend_Search_Lucene_Document $document */ public function addDocument(Zend_Search_Lucene_Document $document) { $this->getIndexWriter()->addDocument($document); $this->_docCount++; } /** * Update document counter */ private function _updateDocCount() { $this->_docCount = 0; foreach ($this->_segmentInfos as $segInfo) { $this->_docCount += $segInfo->count(); } } /** * Commit changes resulting from delete() or undeleteAll() operations. * * @todo undeleteAll processing. */ public function commit() { if ($this->_hasChanges) { foreach ($this->_segmentInfos as $segInfo) { $segInfo->writeChanges(); } $this->_hasChanges = false; } if ($this->_writer !== null) { $this->_writer->commit(); $this->_updateDocCount(); } } /** * Optimize index. * * Merges all segments into one */ public function optimize() { // Commit changes if any changes have been made $this->commit(); if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) { $this->getIndexWriter()->optimize(); $this->_updateDocCount(); } } /** * Returns an array of all terms in this index. * * @return array */ public function terms() { $result = array(); $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue(); foreach ($this->_segmentInfos as $segmentInfo) { $segmentInfo->reset(); // Skip "empty" segments if ($segmentInfo->currentTerm() !== null) { $segmentInfoQueue->put($segmentInfo); } } while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { if ($segmentInfoQueue->top() === null || $segmentInfoQueue->top()->currentTerm()->key() != $segmentInfo->currentTerm()->key()) { // We got new term $result[] = $segmentInfo->currentTerm(); } $segmentInfo->nextTerm(); // check, if segment dictionary is finished if ($segmentInfo->currentTerm() !== null) { // Put segment back into the priority queue $segmentInfoQueue->put($segmentInfo); } } return $result; } /************************************************************************* @todo UNIMPLEMENTED *************************************************************************/ /** * Undeletes all documents currently marked as deleted in this index. * * @todo Implementation */ public function undeleteAll() {}}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?