📄 lucene.php
字号:
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License *//** Zend_Search_Lucene_Exception */require_once "Zend/Search/Lucene/Exception.php";/** Zend_Search_Lucene_Document */require_once "Zend/Search/Lucene/Document.php";/** Zend_Search_Lucene_Document_Html */require_once "Zend/Search/Lucene/Document/Html.php";/** Zend_Search_Lucene_Storage_Directory_Filesystem */require_once "Zend/Search/Lucene/Storage/Directory/Filesystem.php";/** Zend_Search_Lucene_Storage_File_Memory */require_once "Zend/Search/Lucene/Storage/File/Memory.php";/** Zend_Search_Lucene_Index_Term */require_once "Zend/Search/Lucene/Index/Term.php";/** Zend_Search_Lucene_Index_TermInfo */require_once "Zend/Search/Lucene/Index/TermInfo.php";/** Zend_Search_Lucene_Index_SegmentInfo */require_once "Zend/Search/Lucene/Index/SegmentInfo.php";/** Zend_Search_Lucene_Index_FieldInfo */require_once "Zend/Search/Lucene/Index/FieldInfo.php";/** Zend_Search_Lucene_Index_Writer */require_once "Zend/Search/Lucene/Index/Writer.php";/** Zend_Search_Lucene_Search_QueryParser */require_once "Zend/Search/Lucene/Search/QueryParser.php";/** Zend_Search_Lucene_Search_QueryHit */require_once "Zend/Search/Lucene/Search/QueryHit.php";/** Zend_Search_Lucene_Search_Similarity */require_once "Zend/Search/Lucene/Search/Similarity.php";/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */require_once "Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php";/** Zend_Search_Lucene_LockManager */if (@$CFG->block_search_softlock){ require_once "Zend/Search/Lucene/SoftLockManager.php";} else { require_once "Zend/Search/Lucene/LockManager.php";}/** Zend_Search_Lucene_Interface */require_once "Zend/Search/Lucene/Interface.php";/** Zend_Search_Lucene_Proxy */require_once "Zend/Search/Lucene/Proxy.php";/** * @category Zend * @package Zend_Search_Lucene * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */class Zend_Search_Lucene implements Zend_Search_Lucene_Interface{ /** * Default field name for search * * Null means search through all fields * * @var string */ private static $_defaultSearchField = null; /** * Result set limit * * 0 means no limit * * @var integer */ private static $_resultSetLimit = 0; /** * File system adapter. * * @var Zend_Search_Lucene_Storage_Directory */ private $_directory = null; /** * File system adapter closing option * * @var boolean */ private $_closeDirOnExit = true; /** * Writer for this index, not instantiated unless required. * * @var Zend_Search_Lucene_Index_Writer */ private $_writer = null; /** * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. * * @var array Zend_Search_Lucene_Index_SegmentInfo */ private $_segmentInfos = array(); /** * Number of documents in this index. * * @var integer */ private $_docCount = 0; /** * Flag for index changes * * @var boolean */ private $_hasChanges = false; /** * Signal, that index is already closed, changes are fixed and resources are cleaned up * * @var boolean */ private $_closed = false; /** * Number of references to the index object * * @var integer */ private $_refCount = 0; /** * Current segment generation * * @var integer */ private $_generation; /** * Create index * * @param mixed $directory * @return Zend_Search_Lucene_Interface */ public static function create($directory) { return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true)); } /** * Open index * * @param mixed $directory * @return Zend_Search_Lucene_Interface */ public static function open($directory) { return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false)); } /** Generation retrieving counter */ const GENERATION_RETRIEVE_COUNT = 10; /** Pause between generation retrieving attempts in milliseconds */ const GENERATION_RETRIEVE_PAUSE = 50; /** * Get current generation number * * Returns generation number * 0 means pre-2.1 index format * -1 means there are no segments files. * * @param Zend_Search_Lucene_Storage_Directory $directory * @return integer * @throws Zend_Search_Lucene_Exception */ public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory) { /** * Zend_Search_Lucene uses segments.gen file to retrieve current generation number * * Apache Lucene index format documentation mentions this method only as a fallback method * * Nevertheless we use it according to the performance considerations * * @todo check if we can use some modification of Apache Lucene generation determination algorithm * without performance problems */ try { for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) { // Try to get generation file $genFile = $directory->getFileObject('segments.gen', false); $format = $genFile->readInt(); if ($format != (int)0xFFFFFFFE) { throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format'); } $gen1 = $genFile->readLong(); $gen2 = $genFile->readLong(); if ($gen1 == $gen2) { return $gen1; } usleep(self::GENERATION_RETRIEVE_PAUSE * 1000); } // All passes are failed throw new Zend_Search_Lucene_Exception('Index is under processing now'); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { try { // Try to open old style segments file $segmentsFile = $directory->getFileObject('segments', false); // It's pre-2.1 index return 0; } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { return -1; } else { throw $e; } } } else { throw $e; } } return -1; } /** * Get segments file name * * @param integer $generation * @return string */ public static function getSegmentFileName($generation) { if ($generation == 0) { return 'segments'; } return 'segments_' . base_convert($generation, 10, 36); } /** * Read segments file for pre-2.1 Lucene index format */ private function _readPre21SegmentsFile() { $segmentsFile = $this->_directory->getFileObject('segments'); $format = $segmentsFile->readInt(); if ($format != (int)0xFFFFFFFF) { throw new Zend_Search_Lucene_Exception('Wrong segments file format'); } // read version // $segmentsFile->readLong(); $segmentsFile->readInt(); $segmentsFile->readInt(); // read segment name counter $segmentsFile->readInt(); $segments = $segmentsFile->readInt(); $this->_docCount = 0; // read segmentInfos for ($count = 0; $count < $segments; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); $this->_docCount += $segSize; $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize); } } /** * Read segments file * * @throws Zend_Search_Lucene_Exception */ private function _readSegmentsFile() { $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation)); $format = $segmentsFile->readInt(); if ($format != (int)0xFFFFFFFD) { throw new Zend_Search_Lucene_Exception('Wrong segments file format'); } // read version // $segmentsFile->readLong(); $segmentsFile->readInt(); $segmentsFile->readInt(); // read segment name counter $segmentsFile->readInt(); $segments = $segmentsFile->readInt(); $this->_docCount = 0; // read segmentInfos for ($count = 0; $count < $segments; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); // 2.1+ specific properties //$delGen = $segmentsFile->readLong(); $delGenHigh = $segmentsFile->readInt(); $delGenLow = $segmentsFile->readInt(); if ($delGenHigh == (int)0xFFFFFFFF && $delGenLow == (int)0xFFFFFFFF) { $delGen = -1; // There are no deletes } else { $delGen = ($delGenHigh << 32) | $delGenLow; } $hasSingleNormFile = $segmentsFile->readByte(); $numField = $segmentsFile->readInt(); $normGens = array(); if ($numField != (int)0xFFFFFFFF) { for ($count1 = 0; $count1 < $numField; $count1++) { $normGens[] = $segmentsFile->readLong(); } throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.'); } $isCompound = $segmentsFile->readByte(); $this->_docCount += $segSize; $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize, $delGen, $hasSingleNormFile, $isCompound); } } /** * Opens the index. * * IndexReader constructor needs Directory as a parameter. It should be * a string with a path to the index folder or a Directory object. * * @param mixed $directory * @throws Zend_Search_Lucene_Exception */ public function __construct($directory = null, $create = false) { if ($directory === null) { throw new Zend_Search_Exception('No index directory specified'); } if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { $this->_directory = $directory; $this->_closeDirOnExit = false; } else { $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); $this->_closeDirOnExit = true; } $this->_segmentInfos = array(); // Mark index as "under processing" to prevent other processes from premature index cleaning Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory); // Escalate read lock to prevent current generation index files to be deleted while opening process is not done Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory); $this->_generation = self::getActualGeneration($this->_directory); if ($create) { try { Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) { throw $e; } else { throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now'); } } if ($this->_generation == -1) { // Directory doesn't contain existing index, start from 1 $this->_generation = 1; $nameCounter = 0; } else { // Directory contains existing index $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation)); $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) $nameCounter = $segmentsFile->readInt(); $this->_generation++; } Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter); Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); } if ($this->_generation == -1) { throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.'); } else if ($this->_generation == 0) { $this->_readPre21SegmentsFile(); } else { $this->_readSegmentsFile(); } // De-escalate read lock to prevent current generation index files to be deleted while opening process is not done Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory); } /** * Close current index and free resources */ private function _close() { if ($this->_closed) { // index is already closed and resources are cleaned up return; } $this->commit(); // Release "under processing" flag Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory); if ($this->_closeDirOnExit) { $this->_directory->close();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -