lucene.php.svn-base

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· SVN-BASE 代码 · 共 1,038 行 · 第 1/2 页

SVN-BASE
1,038
字号
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category   Zend * @package    Zend_Search_Lucene * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license    http://framework.zend.com/license/new-bsd     New BSD License *//** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** Zend_Search_Lucene_Document */require_once 'Zend/Search/Lucene/Document.php';/** Zend_Search_Lucene_Document_Html */require_once 'Zend/Search/Lucene/Document/Html.php';/** Zend_Search_Lucene_Storage_Directory */require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';/** Zend_Search_Lucene_Storage_File_Memory */require_once 'Zend/Search/Lucene/Storage/File/Memory.php';/** Zend_Search_Lucene_Index_Term */require_once 'Zend/Search/Lucene/Index/Term.php';/** Zend_Search_Lucene_Index_TermInfo */require_once 'Zend/Search/Lucene/Index/TermInfo.php';/** Zend_Search_Lucene_Index_SegmentInfo */require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';/** Zend_Search_Lucene_Index_FieldInfo */require_once 'Zend/Search/Lucene/Index/FieldInfo.php';/** Zend_Search_Lucene_Index_Writer */require_once 'Zend/Search/Lucene/Index/Writer.php';/** Zend_Search_Lucene_Search_QueryParser */require_once 'Zend/Search/Lucene/Search/QueryParser.php';/** Zend_Search_Lucene_Search_QueryHit */require_once 'Zend/Search/Lucene/Search/QueryHit.php';/** Zend_Search_Lucene_Search_Similarity */require_once 'Zend/Search/Lucene/Search/Similarity.php';/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';/** * @category   Zend * @package    Zend_Search_Lucene * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) * @license    http://framework.zend.com/license/new-bsd     New BSD License */class Zend_Search_Lucene{    /**     * Default field name for search     *     * Null means search through all fields     *     * @var string     */    static private $_defaultSearchField = null;    /**     * File system adapter.     *     * @var Zend_Search_Lucene_Storage_Directory     */    private $_directory = null;    /**     * File system adapter closing option     *     * @var boolean     */    private $_closeDirOnExit = true;    /**     * Writer for this index, not instantiated unless required.     *     * @var Zend_Search_Lucene_Index_Writer     */    private $_writer = null;    /**     * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.     *     * @var array Zend_Search_Lucene_Index_SegmentInfo     */    private $_segmentInfos = array();    /**     * Number of documents in this index.     *     * @var integer     */    private $_docCount = 0;    /**     * Flag for index changes     *     * @var boolean     */    private $_hasChanges = false;    /**     * Index lock object     *     * @var Zend_Search_Lucene_Storage_File     */    private $_lock;    /**     * Create index     *     * @param mixed $directory     * @return Zend_Search_Lucene     */    static public function create($directory)    {        return new Zend_Search_Lucene($directory, true);    }    /**     * Open index     *     * @param mixed $directory     * @return Zend_Search_Lucene     */    static public function open($directory)    {        return new Zend_Search_Lucene($directory);    }    /**     * Opens the index.     *     * IndexReader constructor needs Directory as a parameter. It should be     * a string with a path to the index folder or a Directory object.     *     * @param mixed $directory     * @throws Zend_Search_Lucene_Exception     */    public function __construct($directory = null, $create = false)    {        if ($directory === null) {            throw new Zend_Search_Exception('No index directory specified');        }        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {            $this->_directory      = $directory;            $this->_closeDirOnExit = false;        } else {            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);            $this->_closeDirOnExit = true;        }        // Get a shared lock to the index        $this->_lock = $this->_directory->createFile('index.lock');        $this->_segmentInfos = array();        if ($create) {            // Throw an exception if index is under processing now            if (!$this->_lock->lock(LOCK_EX, true)) {                throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');            }            // Writer will create segments file for empty segments list            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true);            if (!$this->_lock->lock(LOCK_SH)) {                throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared');            }        } else {            // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments())            if (!$this->_lock->lock(LOCK_SH)) {                throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock');            }            $this->_writer = null;        }        $segmentsFile = $this->_directory->getFileObject('segments');        $format = $segmentsFile->readInt();        if ($format != (int)0xFFFFFFFF) {            throw new Zend_Search_Lucene_Exception('Wrong segments file format');        }        // read version        // $segmentsFile->readLong();        $segmentsFile->readInt(); $segmentsFile->readInt();        // read segment name counter        $segmentsFile->readInt();        $segments = $segmentsFile->readInt();        $this->_docCount = 0;        // read segmentInfos        for ($count = 0; $count < $segments; $count++) {            $segName = $segmentsFile->readString();            $segSize = $segmentsFile->readInt();            $this->_docCount += $segSize;            $this->_segmentInfos[] =                                new Zend_Search_Lucene_Index_SegmentInfo($segName,                                                                         $segSize,                                                                         $this->_directory);        }    }    /**     * Object destructor     */    public function __destruct()    {        $this->commit();        // Free shared lock        $this->_lock->unlock();        if ($this->_closeDirOnExit) {            $this->_directory->close();        }    }    /**     * Returns an instance of Zend_Search_Lucene_Index_Writer for the index     *     * @return Zend_Search_Lucene_Index_Writer     */    public function getIndexWriter()    {        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos);        }        return $this->_writer;    }    /**     * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.     *     * @return Zend_Search_Lucene_Storage_Directory     */    public function getDirectory()    {        return $this->_directory;    }    /**     * Returns the total number of documents in this index (including deleted documents).     *     * @return integer     */    public function count()    {        return $this->_docCount;    }    /**     * Returns one greater than the largest possible document number.     * This may be used to, e.g., determine how big to allocate a structure which will have     * an element for every document number in an index.     *     * @return integer     */    public function maxDoc()    {        return $this->count();    }    /**     * Returns the total number of non-deleted documents in this index.     *     * @return integer     */    public function numDocs()    {        $numDocs = 0;        foreach ($this->_segmentInfos as $segmentInfo) {            $numDocs += $segmentInfo->numDocs();        }        return $numDocs;    }    /**     * Checks, that document is deleted     *     * @param integer     * @return boolean     * @throws Zend_Search_Lucene_Exception     */    public function isDeleted($id)    {        if ($id >= $this->_docCount) {            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');        }        $segmentStartId = 0;        foreach ($this->_segmentInfos as $segmentInfo) {            if ($segmentStartId + $segmentInfo->count() > $id) {                break;            }            $segmentStartId += $segmentInfo->count();        }        return $segmentInfo->isDeleted($id - $segmentStartId);    }    /**     * Set default search field.     *     * Null means, that search is performed through all fields by default     *     * Default value is null     *     * @param string $fieldName     */    static public function setDefaultSearchField($fieldName)    {        self::$_defaultSearchField = $fieldName;    }    /**     * Get default search field.     *     * Null means, that search is performed through all fields by default     *     * @return string     */    static public function getDefaultSearchField()    {        return self::$_defaultSearchField;    }    /**     * Retrieve index maxBufferedDocs option     *     * maxBufferedDocs is a minimal number of documents required before     * the buffered in-memory documents are written into a new Segment     *     * Default value is 10     *     * @return integer     */    public function getMaxBufferedDocs()    {        return $this->getIndexWriter()->maxBufferedDocs;    }    /**     * Set index maxBufferedDocs option     *     * maxBufferedDocs is a minimal number of documents required before     * the buffered in-memory documents are written into a new Segment     *     * Default value is 10     *     * @param integer $maxBufferedDocs     */    public function setMaxBufferedDocs($maxBufferedDocs)    {        $this->getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;    }    /**     * Retrieve index maxMergeDocs option     *     * maxMergeDocs is a largest number of documents ever merged by addDocument().     * Small values (e.g., less than 10,000) are best for interactive indexing,     * as this limits the length of pauses while indexing to a few seconds.     * Larger values are best for batched indexing and speedier searches.     *     * Default value is PHP_INT_MAX     *     * @return integer     */    public function getMaxMergeDocs()    {        return $this->getIndexWriter()->maxMergeDocs;    }    /**     * Set index maxMergeDocs option     *     * maxMergeDocs is a largest number of documents ever merged by addDocument().     * Small values (e.g., less than 10,000) are best for interactive indexing,     * as this limits the length of pauses while indexing to a few seconds.     * Larger values are best for batched indexing and speedier searches.     *     * Default value is PHP_INT_MAX     *     * @param integer $maxMergeDocs     */    public function setMaxMergeDocs($maxMergeDocs)    {        $this->getIndexWriter()->maxMergeDocs = $maxMergeDocs;    }    /**     * Retrieve index mergeFactor option     *     * mergeFactor determines how often segment indices are merged by addDocument().     * With smaller values, less RAM is used while indexing,     * and searches on unoptimized indices are faster,     * but indexing speed is slower.     * With larger values, more RAM is used during indexing,     * and while searches on unoptimized indices are slower,     * indexing is faster.     * Thus larger values (> 10) are best for batch index creation,     * and smaller values (< 10) for indices that are interactively maintained.     *     * Default value is 10     *     * @return integer     */    public function getMergeFactor()    {        return $this->getIndexWriter()->mergeFactor;    }    /**     * Set index mergeFactor option     *     * mergeFactor determines how often segment indices are merged by addDocument().     * With smaller values, less RAM is used while indexing,     * and searches on unoptimized indices are faster,     * but indexing speed is slower.     * With larger values, more RAM is used during indexing,     * and while searches on unoptimized indices are slower,     * indexing is faster.     * Thus larger values (> 10) are best for batch index creation,     * and smaller values (< 10) for indices that are interactively maintained.     *     * Default value is 10     *     * @param integer $maxMergeDocs     */    public function setMergeFactor($mergeFactor)    {        $this->getIndexWriter()->mergeFactor = $mergeFactor;    }    /**     * Performs a query against the index and returns an array     * of Zend_Search_Lucene_Search_QueryHit objects.     * Input is a string or Zend_Search_Lucene_Search_Query.     *     * @param mixed $query     * @return array Zend_Search_Lucene_Search_QueryHit     * @throws Zend_Search_Lucene_Exception     */    public function find($query)    {        if (is_string($query)) {            $query = Zend_Search_Lucene_Search_QueryParser::parse($query);        }        if (!$query instanceof Zend_Search_Lucene_Search_Query) {            throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');        }        $this->commit();        $hits   = array();        $scores = array();        $ids    = array();        $query = $query->rewrite($this)->optimize($this);        $query->execute($this);        $topScore = 0;        foreach ($query->matchedDocs() as $id => $num) {            $docScore = $query->score($id, $this);            if( $docScore != 0 ) {                $hit = new Zend_Search_Lucene_Search_QueryHit($this);                $hit->id = $id;                $hit->score = $docScore;                $hits[]   = $hit;                $ids[]    = $id;                $scores[] = $docScore;                if ($docScore > $topScore) {                    $topScore = $docScore;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?