📄 lucene.php
字号:
$fdtFile->seek($fieldValuesPosition, SEEK_CUR); $fieldCount = $fdtFile->readVInt(); $doc = new Zend_Search_Lucene_Document(); for ($count = 0; $count < $fieldCount; $count++) { $fieldNum = $fdtFile->readVInt(); $bits = $fdtFile->readByte(); $fieldInfo = $segmentInfo->getField($fieldNum); if (!($bits & 2)) { // Text data $field = new Zend_Search_Lucene_Field($fieldInfo->name, $fdtFile->readString(), 'UTF-8', true, $fieldInfo->isIndexed, $bits & 1 ); } else { // Binary data $field = new Zend_Search_Lucene_Field($fieldInfo->name, $fdtFile->readBinary(), '', true, $fieldInfo->isIndexed, $bits & 1, true ); } $doc->addField($field); } return $doc; } /** * Returns true if index contain documents with specified term. * * Is used for query optimization. * * @param Zend_Search_Lucene_Index_Term $term * @return boolean */ public function hasTerm(Zend_Search_Lucene_Index_Term $term) { foreach ($this->_segmentInfos as $segInfo) { if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) { return true; } } return false; } /** * Returns IDs of all the documents containing term. * * @param Zend_Search_Lucene_Index_Term $term * @return array */ public function termDocs(Zend_Search_Lucene_Index_Term $term) { $result = array(); $segmentStartDocId = 0; foreach ($this->_segmentInfos as $segInfo) { $termInfo = $segInfo->getTermInfo($term); if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { $segmentStartDocId += $segInfo->count(); continue; } $frqFile = $segInfo->openCompoundFile('.frq'); $frqFile->seek($termInfo->freqPointer,SEEK_CUR); $docId = 0; for( $count=0; $count < $termInfo->docFreq; $count++ ) { $docDelta = $frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; } else { $docId += $docDelta/2; // read freq $frqFile->readVInt(); } $result[] = $segmentStartDocId + $docId; } $segmentStartDocId += $segInfo->count(); } return $result; } /** * Returns an array of all term freqs. * Result array structure: array(docId => freq, ...) * * @param Zend_Search_Lucene_Index_Term $term * @return integer */ public function termFreqs(Zend_Search_Lucene_Index_Term $term) { $result = array(); $segmentStartDocId = 0; foreach ($this->_segmentInfos as $segmentInfo) { $result += $segmentInfo->termFreqs($term, $segmentStartDocId); $segmentStartDocId += $segmentInfo->count(); } return $result; } /** * Returns an array of all term positions in the documents. * Result array structure: array(docId => array(pos1, pos2, ...), ...) * * @param Zend_Search_Lucene_Index_Term $term * @return array */ public function termPositions(Zend_Search_Lucene_Index_Term $term) { $result = array(); $segmentStartDocId = 0; foreach ($this->_segmentInfos as $segmentInfo) { $result += $segmentInfo->termPositions($term, $segmentStartDocId); $segmentStartDocId += $segmentInfo->count(); } return $result; } /** * Returns the number of documents in this index containing the $term. * * @param Zend_Search_Lucene_Index_Term $term * @return integer */ public function docFreq(Zend_Search_Lucene_Index_Term $term) { $result = 0; foreach ($this->_segmentInfos as $segInfo) { $termInfo = $segInfo->getTermInfo($term); if ($termInfo !== null) { $result += $termInfo->docFreq; } } return $result; } /** * Retrive similarity used by index reader * * @return Zend_Search_Lucene_Search_Similarity */ public function getSimilarity() { return Zend_Search_Lucene_Search_Similarity::getDefault(); } /** * Returns a normalization factor for "field, document" pair. * * @param integer $id * @param string $fieldName * @return float */ public function norm($id, $fieldName) { if ($id >= $this->_docCount) { return null; } $segmentStartId = 0; foreach ($this->_segmentInfos as $segInfo) { if ($segmentStartId + $segInfo->count() > $id) { break; } $segmentStartId += $segInfo->count(); } if ($segInfo->isDeleted($id - $segmentStartId)) { return 0; } return $segInfo->norm($id - $segmentStartId, $fieldName); } /** * Returns true if any documents have been deleted from this index. * * @return boolean */ public function hasDeletions() { foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentInfo->hasDeletions()) { return true; } } return false; } /** * Deletes a document from the index. * $id is an internal document id * * @param integer|Zend_Search_Lucene_Search_QueryHit $id * @throws Zend_Search_Lucene_Exception */ public function delete($id) { if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { /* @var $id Zend_Search_Lucene_Search_QueryHit */ $id = $id->id; } if ($id >= $this->_docCount) { throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); } $segmentStartId = 0; foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentStartId + $segmentInfo->count() > $id) { break; } $segmentStartId += $segmentInfo->count(); } $segmentInfo->delete($id - $segmentStartId); $this->_hasChanges = true; } /** * Adds a document to this index. * * @param Zend_Search_Lucene_Document $document */ public function addDocument(Zend_Search_Lucene_Document $document) { $this->getIndexWriter()->addDocument($document); $this->_docCount++; $this->_hasChanges = true; } /** * Update document counter */ private function _updateDocCount() { $this->_docCount = 0; foreach ($this->_segmentInfos as $segInfo) { $this->_docCount += $segInfo->count(); } } /** * Commit changes resulting from delete() or undeleteAll() operations. * * @todo undeleteAll processing. */ public function commit() { if ($this->_hasChanges) { foreach ($this->_segmentInfos as $segInfo) { $segInfo->writeChanges(); } $this->getIndexWriter()->commit(); $this->_updateDocCount(); $this->_hasChanges = false; } } /** * Optimize index. * * Merges all segments into one */ public function optimize() { // Commit changes if any changes have been made $this->commit(); if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) { $this->getIndexWriter()->optimize(); $this->_updateDocCount(); } } /** * Returns an array of all terms in this index. * * @return array */ public function terms() { $result = array(); $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue(); foreach ($this->_segmentInfos as $segmentInfo) { $segmentInfo->reset(); // Skip "empty" segments if ($segmentInfo->currentTerm() !== null) { $segmentInfoQueue->put($segmentInfo); } } while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { if ($segmentInfoQueue->top() === null || $segmentInfoQueue->top()->currentTerm()->key() != $segmentInfo->currentTerm()->key()) { // We got new term $result[] = $segmentInfo->currentTerm(); } if ($segmentInfo->nextTerm() !== null) { // Put segment back into the priority queue $segmentInfoQueue->put($segmentInfo); } } return $result; } /** * Terms stream queue * * @var Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */ private $_termsStreamQueue = null; /** * Last Term in a terms stream * * @var Zend_Search_Lucene_Index_Term */ private $_lastTerm = null; /** * Reset terms stream. */ public function resetTermsStream() { $this->_termsStreamQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue(); foreach ($this->_segmentInfos as $segmentInfo) { $segmentInfo->reset(); // Skip "empty" segments if ($segmentInfo->currentTerm() !== null) { $this->_termsStreamQueue->put($segmentInfo); } } $this->nextTerm(); } /** * Skip terms stream up to specified term preffix. * * Prefix contains fully specified field info and portion of searched term * * @param Zend_Search_Lucene_Index_Term $prefix */ public function skipTo(Zend_Search_Lucene_Index_Term $prefix) { $segments = array(); while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) { $segments[] = $segmentInfo; } foreach ($segments as $segmentInfo) { $segmentInfo->skipTo($prefix); if ($segmentInfo->currentTerm() !== null) { $this->_termsStreamQueue->put($segmentInfo); } } $this->nextTerm(); } /** * Scans terms dictionary and returns next term * * @return Zend_Search_Lucene_Index_Term|null */ public function nextTerm() { while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) { if ($this->_termsStreamQueue->top() === null || $this->_termsStreamQueue->top()->currentTerm()->key() != $segmentInfo->currentTerm()->key()) { // We got new term $this->_lastTerm = $segmentInfo->currentTerm(); if ($segmentInfo->nextTerm() !== null) { // Put segment back into the priority queue $this->_termsStreamQueue->put($segmentInfo); } return $this->_lastTerm; } if ($segmentInfo->nextTerm() !== null) { // Put segment back into the priority queue $this->_termsStreamQueue->put($segmentInfo); } } // End of stream $this->_lastTerm = null; return null; } /** * Returns term in current position * * @return Zend_Search_Lucene_Index_Term|null */ public function currentTerm() { return $this->_lastTerm; } /** * Close terms stream * * Should be used for resources clean up if stream is not read up to the end */ public function closeTermsStream() { while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) { $segmentInfo->closeTermsStream(); } $this->_termsStreamQueue = null; $this->_lastTerm = null; } /************************************************************************* @todo UNIMPLEMENTED *************************************************************************/ /** * Undeletes all documents currently marked as deleted in this index. * * @todo Implementation */ public function undeleteAll() {}}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -