lucene.php.svn-base
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 1,038 行 · 第 1/2 页
SVN-BASE
1,038 行
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License *//** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** Zend_Search_Lucene_Document */require_once 'Zend/Search/Lucene/Document.php';/** Zend_Search_Lucene_Document_Html */require_once 'Zend/Search/Lucene/Document/Html.php';/** Zend_Search_Lucene_Storage_Directory */require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';/** Zend_Search_Lucene_Storage_File_Memory */require_once 'Zend/Search/Lucene/Storage/File/Memory.php';/** Zend_Search_Lucene_Index_Term */require_once 'Zend/Search/Lucene/Index/Term.php';/** Zend_Search_Lucene_Index_TermInfo */require_once 'Zend/Search/Lucene/Index/TermInfo.php';/** Zend_Search_Lucene_Index_SegmentInfo */require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';/** Zend_Search_Lucene_Index_FieldInfo */require_once 'Zend/Search/Lucene/Index/FieldInfo.php';/** Zend_Search_Lucene_Index_Writer */require_once 'Zend/Search/Lucene/Index/Writer.php';/** Zend_Search_Lucene_Search_QueryParser */require_once 'Zend/Search/Lucene/Search/QueryParser.php';/** Zend_Search_Lucene_Search_QueryHit */require_once 'Zend/Search/Lucene/Search/QueryHit.php';/** Zend_Search_Lucene_Search_Similarity */require_once 'Zend/Search/Lucene/Search/Similarity.php';/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';/** * @category Zend * @package Zend_Search_Lucene * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */class Zend_Search_Lucene{ /** * Default field name for search * * Null means search through all fields * * @var string */ static private $_defaultSearchField = null; /** * File system adapter. * * @var Zend_Search_Lucene_Storage_Directory */ private $_directory = null; /** * File system adapter closing option * * @var boolean */ private $_closeDirOnExit = true; /** * Writer for this index, not instantiated unless required. * * @var Zend_Search_Lucene_Index_Writer */ private $_writer = null; /** * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. * * @var array Zend_Search_Lucene_Index_SegmentInfo */ private $_segmentInfos = array(); /** * Number of documents in this index. * * @var integer */ private $_docCount = 0; /** * Flag for index changes * * @var boolean */ private $_hasChanges = false; /** * Index lock object * * @var Zend_Search_Lucene_Storage_File */ private $_lock; /** * Create index * * @param mixed $directory * @return Zend_Search_Lucene */ static public function create($directory) { return new Zend_Search_Lucene($directory, true); } /** * Open index * * @param mixed $directory * @return Zend_Search_Lucene */ static public function open($directory) { return new Zend_Search_Lucene($directory); } /** * Opens the index. * * IndexReader constructor needs Directory as a parameter. It should be * a string with a path to the index folder or a Directory object. * * @param mixed $directory * @throws Zend_Search_Lucene_Exception */ public function __construct($directory = null, $create = false) { if ($directory === null) { throw new Zend_Search_Exception('No index directory specified'); } if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { $this->_directory = $directory; $this->_closeDirOnExit = false; } else { $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); $this->_closeDirOnExit = true; } // Get a shared lock to the index $this->_lock = $this->_directory->createFile('index.lock'); $this->_segmentInfos = array(); if ($create) { // Throw an exception if index is under processing now if (!$this->_lock->lock(LOCK_EX, true)) { throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now'); } // Writer will create segments file for empty segments list $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true); if (!$this->_lock->lock(LOCK_SH)) { throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared'); } } else { // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments()) if (!$this->_lock->lock(LOCK_SH)) { throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock'); } $this->_writer = null; } $segmentsFile = $this->_directory->getFileObject('segments'); $format = $segmentsFile->readInt(); if ($format != (int)0xFFFFFFFF) { throw new Zend_Search_Lucene_Exception('Wrong segments file format'); } // read version // $segmentsFile->readLong(); $segmentsFile->readInt(); $segmentsFile->readInt(); // read segment name counter $segmentsFile->readInt(); $segments = $segmentsFile->readInt(); $this->_docCount = 0; // read segmentInfos for ($count = 0; $count < $segments; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); $this->_docCount += $segSize; $this->_segmentInfos[] = new Zend_Search_Lucene_Index_SegmentInfo($segName, $segSize, $this->_directory); } } /** * Object destructor */ public function __destruct() { $this->commit(); // Free shared lock $this->_lock->unlock(); if ($this->_closeDirOnExit) { $this->_directory->close(); } } /** * Returns an instance of Zend_Search_Lucene_Index_Writer for the index * * @return Zend_Search_Lucene_Index_Writer */ public function getIndexWriter() { if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos); } return $this->_writer; } /** * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. * * @return Zend_Search_Lucene_Storage_Directory */ public function getDirectory() { return $this->_directory; } /** * Returns the total number of documents in this index (including deleted documents). * * @return integer */ public function count() { return $this->_docCount; } /** * Returns one greater than the largest possible document number. * This may be used to, e.g., determine how big to allocate a structure which will have * an element for every document number in an index. * * @return integer */ public function maxDoc() { return $this->count(); } /** * Returns the total number of non-deleted documents in this index. * * @return integer */ public function numDocs() { $numDocs = 0; foreach ($this->_segmentInfos as $segmentInfo) { $numDocs += $segmentInfo->numDocs(); } return $numDocs; } /** * Checks, that document is deleted * * @param integer * @return boolean * @throws Zend_Search_Lucene_Exception */ public function isDeleted($id) { if ($id >= $this->_docCount) { throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); } $segmentStartId = 0; foreach ($this->_segmentInfos as $segmentInfo) { if ($segmentStartId + $segmentInfo->count() > $id) { break; } $segmentStartId += $segmentInfo->count(); } return $segmentInfo->isDeleted($id - $segmentStartId); } /** * Set default search field. * * Null means, that search is performed through all fields by default * * Default value is null * * @param string $fieldName */ static public function setDefaultSearchField($fieldName) { self::$_defaultSearchField = $fieldName; } /** * Get default search field. * * Null means, that search is performed through all fields by default * * @return string */ static public function getDefaultSearchField() { return self::$_defaultSearchField; } /** * Retrieve index maxBufferedDocs option * * maxBufferedDocs is a minimal number of documents required before * the buffered in-memory documents are written into a new Segment * * Default value is 10 * * @return integer */ public function getMaxBufferedDocs() { return $this->getIndexWriter()->maxBufferedDocs; } /** * Set index maxBufferedDocs option * * maxBufferedDocs is a minimal number of documents required before * the buffered in-memory documents are written into a new Segment * * Default value is 10 * * @param integer $maxBufferedDocs */ public function setMaxBufferedDocs($maxBufferedDocs) { $this->getIndexWriter()->maxBufferedDocs = $maxBufferedDocs; } /** * Retrieve index maxMergeDocs option * * maxMergeDocs is a largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * * Default value is PHP_INT_MAX * * @return integer */ public function getMaxMergeDocs() { return $this->getIndexWriter()->maxMergeDocs; } /** * Set index maxMergeDocs option * * maxMergeDocs is a largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * * Default value is PHP_INT_MAX * * @param integer $maxMergeDocs */ public function setMaxMergeDocs($maxMergeDocs) { $this->getIndexWriter()->maxMergeDocs = $maxMergeDocs; } /** * Retrieve index mergeFactor option * * mergeFactor determines how often segment indices are merged by addDocument(). * With smaller values, less RAM is used while indexing, * and searches on unoptimized indices are faster, * but indexing speed is slower. * With larger values, more RAM is used during indexing, * and while searches on unoptimized indices are slower, * indexing is faster. * Thus larger values (> 10) are best for batch index creation, * and smaller values (< 10) for indices that are interactively maintained. * * Default value is 10 * * @return integer */ public function getMergeFactor() { return $this->getIndexWriter()->mergeFactor; } /** * Set index mergeFactor option * * mergeFactor determines how often segment indices are merged by addDocument(). * With smaller values, less RAM is used while indexing, * and searches on unoptimized indices are faster, * but indexing speed is slower. * With larger values, more RAM is used during indexing, * and while searches on unoptimized indices are slower, * indexing is faster. * Thus larger values (> 10) are best for batch index creation, * and smaller values (< 10) for indices that are interactively maintained. * * Default value is 10 * * @param integer $maxMergeDocs */ public function setMergeFactor($mergeFactor) { $this->getIndexWriter()->mergeFactor = $mergeFactor; } /** * Performs a query against the index and returns an array * of Zend_Search_Lucene_Search_QueryHit objects. * Input is a string or Zend_Search_Lucene_Search_Query. * * @param mixed $query * @return array Zend_Search_Lucene_Search_QueryHit * @throws Zend_Search_Lucene_Exception */ public function find($query) { if (is_string($query)) { $query = Zend_Search_Lucene_Search_QueryParser::parse($query); } if (!$query instanceof Zend_Search_Lucene_Search_Query) { throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); } $this->commit(); $hits = array(); $scores = array(); $ids = array(); $query = $query->rewrite($this)->optimize($this); $query->execute($this); $topScore = 0; foreach ($query->matchedDocs() as $id => $num) { $docScore = $query->score($id, $this); if( $docScore != 0 ) { $hit = new Zend_Search_Lucene_Search_QueryHit($this); $hit->id = $id; $hit->score = $docScore; $hits[] = $hit; $ids[] = $id; $scores[] = $docScore; if ($docScore > $topScore) { $topScore = $docScore;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?