lucene.php
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 1,038 行 · 第 1/3 页
PHP
1,038 行
* Default value is null
*
* @param string $fieldName
*/
static public function setDefaultSearchField($fieldName)
{
self::$_defaultSearchField = $fieldName;
}
/**
* Get default search field.
*
* Null means, that search is performed through all fields by default
*
* @return string
*/
static public function getDefaultSearchField()
{
return self::$_defaultSearchField;
}
/**
* Retrieve index maxBufferedDocs option
*
* maxBufferedDocs is a minimal number of documents required before
* the buffered in-memory documents are written into a new Segment
*
* Default value is 10
*
* @return integer
*/
public function getMaxBufferedDocs()
{
return $this->getIndexWriter()->maxBufferedDocs;
}
/**
* Set index maxBufferedDocs option
*
* maxBufferedDocs is a minimal number of documents required before
* the buffered in-memory documents are written into a new Segment
*
* Default value is 10
*
* @param integer $maxBufferedDocs
*/
public function setMaxBufferedDocs($maxBufferedDocs)
{
$this->getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;
}
/**
* Retrieve index maxMergeDocs option
*
* maxMergeDocs is a largest number of documents ever merged by addDocument().
* Small values (e.g., less than 10,000) are best for interactive indexing,
* as this limits the length of pauses while indexing to a few seconds.
* Larger values are best for batched indexing and speedier searches.
*
* Default value is PHP_INT_MAX
*
* @return integer
*/
public function getMaxMergeDocs()
{
return $this->getIndexWriter()->maxMergeDocs;
}
/**
* Set index maxMergeDocs option
*
* maxMergeDocs is a largest number of documents ever merged by addDocument().
* Small values (e.g., less than 10,000) are best for interactive indexing,
* as this limits the length of pauses while indexing to a few seconds.
* Larger values are best for batched indexing and speedier searches.
*
* Default value is PHP_INT_MAX
*
* @param integer $maxMergeDocs
*/
public function setMaxMergeDocs($maxMergeDocs)
{
$this->getIndexWriter()->maxMergeDocs = $maxMergeDocs;
}
/**
* Retrieve index mergeFactor option
*
* mergeFactor determines how often segment indices are merged by addDocument().
* With smaller values, less RAM is used while indexing,
* and searches on unoptimized indices are faster,
* but indexing speed is slower.
* With larger values, more RAM is used during indexing,
* and while searches on unoptimized indices are slower,
* indexing is faster.
* Thus larger values (> 10) are best for batch index creation,
* and smaller values (< 10) for indices that are interactively maintained.
*
* Default value is 10
*
* @return integer
*/
public function getMergeFactor()
{
return $this->getIndexWriter()->mergeFactor;
}
/**
* Set index mergeFactor option
*
* mergeFactor determines how often segment indices are merged by addDocument().
* With smaller values, less RAM is used while indexing,
* and searches on unoptimized indices are faster,
* but indexing speed is slower.
* With larger values, more RAM is used during indexing,
* and while searches on unoptimized indices are slower,
* indexing is faster.
* Thus larger values (> 10) are best for batch index creation,
* and smaller values (< 10) for indices that are interactively maintained.
*
* Default value is 10
*
* @param integer $maxMergeDocs
*/
public function setMergeFactor($mergeFactor)
{
$this->getIndexWriter()->mergeFactor = $mergeFactor;
}
/**
* Performs a query against the index and returns an array
* of Zend_Search_Lucene_Search_QueryHit objects.
* Input is a string or Zend_Search_Lucene_Search_Query.
*
* @param mixed $query
* @return array Zend_Search_Lucene_Search_QueryHit
* @throws Zend_Search_Lucene_Exception
*/
public function find($query)
{
if (is_string($query)) {
$query = Zend_Search_Lucene_Search_QueryParser::parse($query);
}
if (!$query instanceof Zend_Search_Lucene_Search_Query) {
throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
}
$this->commit();
$hits = array();
$scores = array();
$ids = array();
$query = $query->rewrite($this)->optimize($this);
$query->execute($this);
$topScore = 0;
foreach ($query->matchedDocs() as $id => $num) {
$docScore = $query->score($id, $this);
if( $docScore != 0 ) {
$hit = new Zend_Search_Lucene_Search_QueryHit($this);
$hit->id = $id;
$hit->score = $docScore;
$hits[] = $hit;
$ids[] = $id;
$scores[] = $docScore;
if ($docScore > $topScore) {
$topScore = $docScore;
}
}
}
if (count($hits) == 0) {
// skip sorting, which may cause a error on empty index
return array();
}
if ($topScore > 1) {
$normalizedScores = array();
foreach ($scores as $score) {
$normalizedScores[] = $score/$topScore;
}
$scores = $normalizedScores;
}
if (func_num_args() == 1) {
// sort by scores
array_multisort($scores, SORT_DESC, SORT_NUMERIC,
$ids, SORT_ASC, SORT_NUMERIC,
$hits);
} else {
// sort by given field names
$argList = func_get_args();
$fieldNames = $this->getFieldNames();
$sortArgs = array();
for ($count = 1; $count < count($argList); $count++) {
$fieldName = $argList[$count];
if (!is_string($fieldName)) {
throw new Zend_Search_Lucene_Exception('Field name must be a string.');
}
if (!in_array($fieldName, $fieldNames)) {
throw new Zend_Search_Lucene_Exception('Wrong field name.');
}
$valuesArray = array();
foreach ($hits as $hit) {
try {
$value = $hit->getDocument()->getFieldValue($fieldName);
} catch (Zend_Search_Lucene_Exception $e) {
if (strpos($e->getMessage(), 'not found') === false) {
throw $e;
} else {
$value = null;
}
}
$valuesArray[] = $value;
}
$sortArgs[] = $valuesArray;
if ($count + 1 < count($argList) && is_integer($argList[$count+1])) {
$count++;
$sortArgs[] = $argList[$count];
if ($count + 1 < count($argList) && is_integer($argList[$count+1])) {
$count++;
$sortArgs[] = $argList[$count];
} else {
if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) {
$sortArgs[] = SORT_REGULAR;
} else {
$sortArgs[] = SORT_ASC;
}
}
} else {
$sortArgs[] = SORT_ASC;
$sortArgs[] = SORT_REGULAR;
}
}
// Sort by id's if values are equal
$sortArgs[] = $ids;
$sortArgs[] = SORT_ASC;
$sortArgs[] = SORT_NUMERIC;
// Array to be sorted
$sortArgs[] = &$hits;
// Do sort
call_user_func_array('array_multisort', $sortArgs);
}
return $hits;
}
/**
* Returns a list of all unique field names that exist in this index.
*
* @param boolean $indexed
* @return array
*/
public function getFieldNames($indexed = false)
{
$result = array();
foreach( $this->_segmentInfos as $segmentInfo ) {
$result = array_merge($result, $segmentInfo->getFields($indexed));
}
return $result;
}
/**
* Returns a Zend_Search_Lucene_Document object for the document
* number $id in this index.
*
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
* @return Zend_Search_Lucene_Document
*/
public function getDocument($id)
{
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
/* @var $id Zend_Search_Lucene_Search_QueryHit */
$id = $id->id;
}
if ($id >= $this->_docCount) {
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
$segmentStartId = 0;
foreach ($this->_segmentInfos as $segmentInfo) {
if ($segmentStartId + $segmentInfo->count() > $id) {
break;
}
$segmentStartId += $segmentInfo->count();
}
$fdxFile = $segmentInfo->openCompoundFile('.fdx');
$fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );
$fieldValuesPosition = $fdxFile->readLong();
$fdtFile = $segmentInfo->openCompoundFile('.fdt');
$fdtFile->seek($fieldValuesPosition, SEEK_CUR);
$fieldCount = $fdtFile->readVInt();
$doc = new Zend_Search_Lucene_Document();
for ($count = 0; $count < $fieldCount; $count++) {
$fieldNum = $fdtFile->readVInt();
$bits = $fdtFile->readByte();
$fieldInfo = $segmentInfo->getField($fieldNum);
if (!($bits & 2)) { // Text data
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
$fdtFile->readString(),
'UTF-8',
true,
$fieldInfo->isIndexed,
$bits & 1 );
} else { // Binary data
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
$fdtFile->readBinary(),
'',
true,
$fieldInfo->isIndexed,
$bits & 1,
true );
}
$doc->addField($field);
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?