lucene.php
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 1,038 行 · 第 1/3 页
PHP
1,038 行
return $doc;
}
/**
* Returns true if index contain documents with specified term.
*
* Is used for query optimization.
*
* @param Zend_Search_Lucene_Index_Term $term
* @return boolean
*/
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
{
foreach ($this->_segmentInfos as $segInfo) {
if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) {
return true;
}
}
return false;
}
/**
* Returns an array of all the documents which contain term.
*
* @param Zend_Search_Lucene_Index_Term $term
* @return array
*/
public function termDocs(Zend_Search_Lucene_Index_Term $term)
{
$result = array();
$segmentStartDocId = 0;
foreach ($this->_segmentInfos as $segInfo) {
$termInfo = $segInfo->getTermInfo($term);
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
$segmentStartDocId += $segInfo->count();
continue;
}
$frqFile = $segInfo->openCompoundFile('.frq');
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
$docId = 0;
for( $count=0; $count < $termInfo->docFreq; $count++ ) {
$docDelta = $frqFile->readVInt();
if( $docDelta % 2 == 1 ) {
$docId += ($docDelta-1)/2;
} else {
$docId += $docDelta/2;
// read freq
$frqFile->readVInt();
}
$result[] = $segmentStartDocId + $docId;
}
$segmentStartDocId += $segInfo->count();
}
return $result;
}
/**
* Returns an array of all term positions in the documents.
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
*
* @param Zend_Search_Lucene_Index_Term $term
* @return array
*/
public function termPositions(Zend_Search_Lucene_Index_Term $term)
{
$result = array();
$segmentStartDocId = 0;
foreach( $this->_segmentInfos as $segInfo ) {
$termInfo = $segInfo->getTermInfo($term);
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
$segmentStartDocId += $segInfo->count();
continue;
}
$frqFile = $segInfo->openCompoundFile('.frq');
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
$freqs = array();
$docId = 0;
for( $count = 0; $count < $termInfo->docFreq; $count++ ) {
$docDelta = $frqFile->readVInt();
if( $docDelta % 2 == 1 ) {
$docId += ($docDelta-1)/2;
$freqs[ $docId ] = 1;
} else {
$docId += $docDelta/2;
$freqs[ $docId ] = $frqFile->readVInt();
}
}
$prxFile = $segInfo->openCompoundFile('.prx');
$prxFile->seek($termInfo->proxPointer,SEEK_CUR);
foreach ($freqs as $docId => $freq) {
$termPosition = 0;
$positions = array();
for ($count = 0; $count < $freq; $count++ ) {
$termPosition += $prxFile->readVInt();
$positions[] = $termPosition;
}
$result[ $segmentStartDocId + $docId ] = $positions;
}
$segmentStartDocId += $segInfo->count();
}
return $result;
}
/**
* Returns the number of documents in this index containing the $term.
*
* @param Zend_Search_Lucene_Index_Term $term
* @return integer
*/
public function docFreq(Zend_Search_Lucene_Index_Term $term)
{
$result = 0;
foreach ($this->_segmentInfos as $segInfo) {
$termInfo = $segInfo->getTermInfo($term);
if ($termInfo !== null) {
$result += $termInfo->docFreq;
}
}
return $result;
}
/**
* Retrive similarity used by index reader
*
* @return Zend_Search_Lucene_Search_Similarity
*/
public function getSimilarity()
{
return Zend_Search_Lucene_Search_Similarity::getDefault();
}
/**
* Returns a normalization factor for "field, document" pair.
*
* @param integer $id
* @param string $fieldName
* @return float
*/
public function norm( $id, $fieldName )
{
if ($id >= $this->_docCount) {
return null;
}
$segmentStartId = 0;
foreach ($this->_segmentInfos as $segInfo) {
if ($segmentStartId + $segInfo->count() > $id) {
break;
}
$segmentStartId += $segInfo->count();
}
if ($segInfo->isDeleted($id - $segmentStartId)) {
return 0;
}
return $segInfo->norm($id - $segmentStartId, $fieldName);
}
/**
* Returns true if any documents have been deleted from this index.
*
* @return boolean
*/
public function hasDeletions()
{
foreach ($this->_segmentInfos as $segmentInfo) {
if ($segmentInfo->hasDeletions()) {
return true;
}
}
return false;
}
/**
* Deletes a document from the index.
* $id is an internal document id
*
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
* @throws Zend_Search_Lucene_Exception
*/
public function delete($id)
{
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
/* @var $id Zend_Search_Lucene_Search_QueryHit */
$id = $id->id;
}
if ($id >= $this->_docCount) {
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
$segmentStartId = 0;
foreach ($this->_segmentInfos as $segmentInfo) {
if ($segmentStartId + $segmentInfo->count() > $id) {
break;
}
$segmentStartId += $segmentInfo->count();
}
$segmentInfo->delete($id - $segmentStartId);
$this->_hasChanges = true;
}
/**
* Adds a document to this index.
*
* @param Zend_Search_Lucene_Document $document
*/
public function addDocument(Zend_Search_Lucene_Document $document)
{
$this->getIndexWriter()->addDocument($document);
$this->_docCount++;
}
/**
* Update document counter
*/
private function _updateDocCount()
{
$this->_docCount = 0;
foreach ($this->_segmentInfos as $segInfo) {
$this->_docCount += $segInfo->count();
}
}
/**
* Commit changes resulting from delete() or undeleteAll() operations.
*
* @todo undeleteAll processing.
*/
public function commit()
{
if ($this->_hasChanges) {
foreach ($this->_segmentInfos as $segInfo) {
$segInfo->writeChanges();
}
$this->_hasChanges = false;
}
if ($this->_writer !== null) {
$this->_writer->commit();
$this->_updateDocCount();
}
}
/**
* Optimize index.
*
* Merges all segments into one
*/
public function optimize()
{
// Commit changes if any changes have been made
$this->commit();
if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
$this->getIndexWriter()->optimize();
$this->_updateDocCount();
}
}
/**
* Returns an array of all terms in this index.
*
* @return array
*/
public function terms()
{
$result = array();
$segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
foreach ($this->_segmentInfos as $segmentInfo) {
$segmentInfo->reset();
// Skip "empty" segments
if ($segmentInfo->currentTerm() !== null) {
$segmentInfoQueue->put($segmentInfo);
}
}
while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
if ($segmentInfoQueue->top() === null ||
$segmentInfoQueue->top()->currentTerm()->key() !=
$segmentInfo->currentTerm()->key()) {
// We got new term
$result[] = $segmentInfo->currentTerm();
}
$segmentInfo->nextTerm();
// check, if segment dictionary is finished
if ($segmentInfo->currentTerm() !== null) {
// Put segment back into the priority queue
$segmentInfoQueue->put($segmentInfo);
}
}
return $result;
}
/*************************************************************************
@todo UNIMPLEMENTED
*************************************************************************/
/**
* Undeletes all documents currently marked as deleted in this index.
*
* @todo Implementation
*/
public function undeleteAll()
{}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?