📄 lucene.php
字号:
} $this->_directory = null; $this->_writer = null; $this->_segmentInfos = null; $this->_closed = true; } /** * Add reference to the index object * * @internal */ public function addReference() { $this->_refCount++; } /** * Remove reference from the index object * * When reference count becomes zero, index is closed and resources are cleaned up * * @internal */ public function removeReference() { $this->_refCount--; if ($this->_refCount == 0) { $this->_close(); } } /** * Object destructor */ public function __destruct() { $this->_close(); } /** * Returns an instance of Zend_Search_Lucene_Index_Writer for the index * * @internal * @return Zend_Search_Lucene_Index_Writer */ public function getIndexWriter() { if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos); } return $this->_writer; } /** * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. * * @return Zend_Search_Lucene_Storage_Directory */ public function getDirectory() { return $this->_directory; } /** * Returns the total number of documents in this index (including deleted documents). * * @return integer */ public function count() { return $this->_docCount; } /** * Returns one greater than the largest possible document number. * This may be used to, e.g., determine how big to allocate a structure which will have * an element for every document number in an index. * * @return integer */ public function maxDoc() { return $this->count(); } /** * Returns the total number of non-deleted documents in this index. * * @return integer */ public function numDocs() { $numDocs = 0; foreach ($this->_segmentInfos as $segmentInfo) { $numDocs += $segmentInfo->numDocs(); } return $numDocs; } /** * Checks, that document is deleted * * @param integer $id * @return boolean * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range */ public function isDeleted($id) { if ($id >= $this->_docCount) { throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); } $segmentStartId = 0; foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentStartId + $segmentInfo->count() > $id) { break; } $segmentStartId += $segmentInfo->count(); } return $segmentInfo->isDeleted($id - $segmentStartId); } /** * Set default search field. * * Null means, that search is performed through all fields by default * * Default value is null * * @param string $fieldName */ public static function setDefaultSearchField($fieldName) { self::$_defaultSearchField = $fieldName; } /** * Get default search field. * * Null means, that search is performed through all fields by default * * @return string */ public static function getDefaultSearchField() { return self::$_defaultSearchField; } /** * Set result set limit. * * 0 (default) means no limit * * @param integer $limit */ public static function setResultSetLimit($limit) { self::$_resultSetLimit = $limit; } /** * Set result set limit. * * 0 means no limit * * @return integer */ public static function getResultSetLimit() { return self::$_resultSetLimit; } /** * Retrieve index maxBufferedDocs option * * maxBufferedDocs is a minimal number of documents required before * the buffered in-memory documents are written into a new Segment * * Default value is 10 * * @return integer */ public function getMaxBufferedDocs() { return $this->getIndexWriter()->maxBufferedDocs; } /** * Set index maxBufferedDocs option * * maxBufferedDocs is a minimal number of documents required before * the buffered in-memory documents are written into a new Segment * * Default value is 10 * * @param integer $maxBufferedDocs */ public function setMaxBufferedDocs($maxBufferedDocs) { $this->getIndexWriter()->maxBufferedDocs = $maxBufferedDocs; } /** * Retrieve index maxMergeDocs option * * maxMergeDocs is a largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * * Default value is PHP_INT_MAX * * @return integer */ public function getMaxMergeDocs() { return $this->getIndexWriter()->maxMergeDocs; } /** * Set index maxMergeDocs option * * maxMergeDocs is a largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * * Default value is PHP_INT_MAX * * @param integer $maxMergeDocs */ public function setMaxMergeDocs($maxMergeDocs) { $this->getIndexWriter()->maxMergeDocs = $maxMergeDocs; } /** * Retrieve index mergeFactor option * * mergeFactor determines how often segment indices are merged by addDocument(). * With smaller values, less RAM is used while indexing, * and searches on unoptimized indices are faster, * but indexing speed is slower. * With larger values, more RAM is used during indexing, * and while searches on unoptimized indices are slower, * indexing is faster. * Thus larger values (> 10) are best for batch index creation, * and smaller values (< 10) for indices that are interactively maintained. * * Default value is 10 * * @return integer */ public function getMergeFactor() { return $this->getIndexWriter()->mergeFactor; } /** * Set index mergeFactor option * * mergeFactor determines how often segment indices are merged by addDocument(). * With smaller values, less RAM is used while indexing, * and searches on unoptimized indices are faster, * but indexing speed is slower. * With larger values, more RAM is used during indexing, * and while searches on unoptimized indices are slower, * indexing is faster. * Thus larger values (> 10) are best for batch index creation, * and smaller values (< 10) for indices that are interactively maintained. * * Default value is 10 * * @param integer $maxMergeDocs */ public function setMergeFactor($mergeFactor) { $this->getIndexWriter()->mergeFactor = $mergeFactor; } /** * Performs a query against the index and returns an array * of Zend_Search_Lucene_Search_QueryHit objects. * Input is a string or Zend_Search_Lucene_Search_Query. * * @param mixed $query * @return array Zend_Search_Lucene_Search_QueryHit * @throws Zend_Search_Lucene_Exception */ public function find($query) { if (is_string($query)) { $query = Zend_Search_Lucene_Search_QueryParser::parse($query); } if (!$query instanceof Zend_Search_Lucene_Search_Query) { throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); } $this->commit(); $hits = array(); $scores = array(); $ids = array(); $query = $query->rewrite($this)->optimize($this); $query->execute($this); $topScore = 0; foreach ($query->matchedDocs() as $id => $num) { $docScore = $query->score($id, $this); if( $docScore != 0 ) { $hit = new Zend_Search_Lucene_Search_QueryHit($this); $hit->id = $id; $hit->score = $docScore; $hits[] = $hit; $ids[] = $id; $scores[] = $docScore; if ($docScore > $topScore) { $topScore = $docScore; } } if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) { break; } } if (count($hits) == 0) { // skip sorting, which may cause a error on empty index return array(); } if ($topScore > 1) { foreach ($hits as $hit) { $hit->score /= $topScore; } } if (func_num_args() == 1) { // sort by scores array_multisort($scores, SORT_DESC, SORT_NUMERIC, $ids, SORT_ASC, SORT_NUMERIC, $hits); } else { // sort by given field names $argList = func_get_args(); $fieldNames = $this->getFieldNames(); $sortArgs = array(); for ($count = 1; $count < count($argList); $count++) { $fieldName = $argList[$count]; if (!is_string($fieldName)) { throw new Zend_Search_Lucene_Exception('Field name must be a string.'); } if (!in_array($fieldName, $fieldNames)) { throw new Zend_Search_Lucene_Exception('Wrong field name.'); } $valuesArray = array(); foreach ($hits as $hit) { try { $value = $hit->getDocument()->getFieldValue($fieldName); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'not found') === false) { throw $e; } else { $value = null; } } $valuesArray[] = $value; } $sortArgs[] = $valuesArray; if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { $count++; $sortArgs[] = $argList[$count]; if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { $count++; $sortArgs[] = $argList[$count]; } else { if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) { $sortArgs[] = SORT_REGULAR; } else { $sortArgs[] = SORT_ASC; } } } else { $sortArgs[] = SORT_ASC; $sortArgs[] = SORT_REGULAR; } } // Sort by id's if values are equal $sortArgs[] = $ids; $sortArgs[] = SORT_ASC; $sortArgs[] = SORT_NUMERIC; // Array to be sorted $sortArgs[] = &$hits; // Do sort call_user_func_array('array_multisort', $sortArgs); } return $hits; } /** * Returns a list of all unique field names that exist in this index. * * @param boolean $indexed * @return array */ public function getFieldNames($indexed = false) { $result = array(); foreach( $this->_segmentInfos as $segmentInfo ) { $result = array_merge($result, $segmentInfo->getFields($indexed)); } return $result; } /** * Returns a Zend_Search_Lucene_Document object for the document * number $id in this index. * * @param integer|Zend_Search_Lucene_Search_QueryHit $id * @return Zend_Search_Lucene_Document */ public function getDocument($id) { if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { /* @var $id Zend_Search_Lucene_Search_QueryHit */ $id = $id->id; } if ($id >= $this->_docCount) { throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); } $segmentStartId = 0; foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentStartId + $segmentInfo->count() > $id) { break; } $segmentStartId += $segmentInfo->count(); } $fdxFile = $segmentInfo->openCompoundFile('.fdx'); $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR ); $fieldValuesPosition = $fdxFile->readLong(); $fdtFile = $segmentInfo->openCompoundFile('.fdt');
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -