lucene.php

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 1,038 行 · 第 1/3 页

PHP
1,038
字号
<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */


/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';

/** Zend_Search_Lucene_Document */
require_once 'Zend/Search/Lucene/Document.php';

/** Zend_Search_Lucene_Document_Html */
require_once 'Zend/Search/Lucene/Document/Html.php';

/** Zend_Search_Lucene_Storage_Directory */
require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';

/** Zend_Search_Lucene_Storage_File_Memory */
require_once 'Zend/Search/Lucene/Storage/File/Memory.php';

/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';

/** Zend_Search_Lucene_Index_TermInfo */
require_once 'Zend/Search/Lucene/Index/TermInfo.php';

/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';

/** Zend_Search_Lucene_Index_FieldInfo */
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';

/** Zend_Search_Lucene_Index_Writer */
require_once 'Zend/Search/Lucene/Index/Writer.php';

/** Zend_Search_Lucene_Search_QueryParser */
require_once 'Zend/Search/Lucene/Search/QueryParser.php';

/** Zend_Search_Lucene_Search_QueryHit */
require_once 'Zend/Search/Lucene/Search/QueryHit.php';

/** Zend_Search_Lucene_Search_Similarity */
require_once 'Zend/Search/Lucene/Search/Similarity.php';

/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Search_Lucene
{
    /**
     * Default field name for search
     *
     * Null means search through all fields
     *
     * @var string
     */
    static private $_defaultSearchField = null;

    /**
     * File system adapter.
     *
     * @var Zend_Search_Lucene_Storage_Directory
     */
    private $_directory = null;

    /**
     * File system adapter closing option
     *
     * @var boolean
     */
    private $_closeDirOnExit = true;

    /**
     * Writer for this index, not instantiated unless required.
     *
     * @var Zend_Search_Lucene_Index_Writer
     */
    private $_writer = null;

    /**
     * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
     *
     * @var array Zend_Search_Lucene_Index_SegmentInfo
     */
    private $_segmentInfos = array();

    /**
     * Number of documents in this index.
     *
     * @var integer
     */
    private $_docCount = 0;

    /**
     * Flag for index changes
     *
     * @var boolean
     */
    private $_hasChanges = false;


    /**
     * Index lock object
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    private $_lock;


    /**
     * Create index
     *
     * @param mixed $directory
     * @return Zend_Search_Lucene
     */
    static public function create($directory)
    {
        return new Zend_Search_Lucene($directory, true);
    }

    /**
     * Open index
     *
     * @param mixed $directory
     * @return Zend_Search_Lucene
     */
    static public function open($directory)
    {
        return new Zend_Search_Lucene($directory);
    }

    /**
     * Opens the index.
     *
     * IndexReader constructor needs Directory as a parameter. It should be
     * a string with a path to the index folder or a Directory object.
     *
     * @param mixed $directory
     * @throws Zend_Search_Lucene_Exception
     */
    public function __construct($directory = null, $create = false)
    {
        if ($directory === null) {
            throw new Zend_Search_Exception('No index directory specified');
        }

        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
            $this->_directory      = $directory;
            $this->_closeDirOnExit = false;
        } else {
            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
            $this->_closeDirOnExit = true;
        }


        // Get a shared lock to the index
        $this->_lock = $this->_directory->createFile('index.lock');

        $this->_segmentInfos = array();

        if ($create) {
            // Throw an exception if index is under processing now
            if (!$this->_lock->lock(LOCK_EX, true)) {
                throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
            }

            // Writer will create segments file for empty segments list
            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true);

            if (!$this->_lock->lock(LOCK_SH)) {
                throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared');
            }
        } else {
            // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments())
            if (!$this->_lock->lock(LOCK_SH)) {
                throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock');
            }
            $this->_writer = null;
        }


        $segmentsFile = $this->_directory->getFileObject('segments');

        $format = $segmentsFile->readInt();

        if ($format != (int)0xFFFFFFFF) {
            throw new Zend_Search_Lucene_Exception('Wrong segments file format');
        }

        // read version
        // $segmentsFile->readLong();
        $segmentsFile->readInt(); $segmentsFile->readInt();

        // read segment name counter
        $segmentsFile->readInt();

        $segments = $segmentsFile->readInt();

        $this->_docCount = 0;

        // read segmentInfos
        for ($count = 0; $count < $segments; $count++) {
            $segName = $segmentsFile->readString();
            $segSize = $segmentsFile->readInt();
            $this->_docCount += $segSize;

            $this->_segmentInfos[] =
                                new Zend_Search_Lucene_Index_SegmentInfo($segName,
                                                                         $segSize,
                                                                         $this->_directory);
        }
    }


    /**
     * Object destructor
     */
    public function __destruct()
    {
        $this->commit();

        // Free shared lock
        $this->_lock->unlock();

        if ($this->_closeDirOnExit) {
            $this->_directory->close();
        }
    }

    /**
     * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
     *
     * @return Zend_Search_Lucene_Index_Writer
     */
    public function getIndexWriter()
    {
        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos);
        }

        return $this->_writer;
    }


    /**
     * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
     *
     * @return Zend_Search_Lucene_Storage_Directory
     */
    public function getDirectory()
    {
        return $this->_directory;
    }


    /**
     * Returns the total number of documents in this index (including deleted documents).
     *
     * @return integer
     */
    public function count()
    {
        return $this->_docCount;
    }

    /**
     * Returns one greater than the largest possible document number.
     * This may be used to, e.g., determine how big to allocate a structure which will have
     * an element for every document number in an index.
     *
     * @return integer
     */
    public function maxDoc()
    {
        return $this->count();
    }

    /**
     * Returns the total number of non-deleted documents in this index.
     *
     * @return integer
     */
    public function numDocs()
    {
        $numDocs = 0;

        foreach ($this->_segmentInfos as $segmentInfo) {
            $numDocs += $segmentInfo->numDocs();
        }

        return $numDocs;
    }

    /**
     * Checks, that document is deleted
     *
     * @param integer
     * @return boolean
     * @throws Zend_Search_Lucene_Exception
     */
    public function isDeleted($id)
    {
        if ($id >= $this->_docCount) {
            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
        }

        $segmentStartId = 0;
        foreach ($this->_segmentInfos as $segmentInfo) {
            if ($segmentStartId + $segmentInfo->count() > $id) {
                break;
            }

            $segmentStartId += $segmentInfo->count();
        }

        return $segmentInfo->isDeleted($id - $segmentStartId);
    }

    /**
     * Set default search field.
     *
     * Null means, that search is performed through all fields by default
     *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?