⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 phpluceneindexer.inc.php

📁 PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。
💻 PHP
字号:
<?php

/**
 * $Id:$
 *
 * KnowledgeTree Community Edition
 * Document Management Made Simple
 * Copyright (C) 2008 KnowledgeTree Inc.
 * Portions copyright The Jam Warehouse Software (Pty) Limited
 * 
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License version 3 as published by the
 * Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, 
 * California 94120-7775, or email info@knowledgetree.com.
 * 
 * The interactive user interfaces in modified source and object code versions
 * of this program must display Appropriate Legal Notices, as required under
 * Section 5 of the GNU General Public License version 3.
 * 
 * In accordance with Section 7(b) of the GNU General Public License version 3,
 * these Appropriate Legal Notices must retain the display of the "Powered by
 * KnowledgeTree" logo and retain the original copyright notice. If the display of the 
 * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
 * must display the words "Powered by KnowledgeTree" and retain the original 
 * copyright notice.
 * Contributor( s): ______________________________________
 *
 */

/**
 * TODO: must fix text higlighting!
 * TODO: must check that expunge does not bomb out...
 */

require_once 'Zend/Search/Lucene.php';

class PHPLuceneIndexer extends Indexer
{
	/**
	 * @var Zend_Search_Lucene
	 */
	private $lucene;

	/**
	 * The constructor for PHP Lucene
	 *
	 * @param boolean $create Optional. If true, the lucene index will be recreated.
	 */
	public function __construct($catchException=false)
	{
		parent::__construct();
		$config =& KTConfig::getSingleton();
		$indexPath = $config->get('indexer/luceneDirectory');
		try
		{
			$this->lucene = new Zend_Search_Lucene($indexPath, false);
		}
		catch(Exception $ex)
		{
			$this->lucene = null;
			if (!$catchException)
				throw $ex;
		}
	}

	/**
	 * Creates an index to be used.
	 *
	 */
	public static function createIndex()
	{
		$config =& KTConfig::getSingleton();
		$indexPath = $config->get('indexer/luceneDirectory');
		new Zend_Search_Lucene($indexPath, true);
	}


	/**
	 * A refactored method to add the document to the index..
	 *
	 * @param int $docid
	 * @param string $content
	 * @param string $discussion
	 */
	private function addDocument($docid, $content, $discussion, $title, $version)
	{
		$doc = new Zend_Search_Lucene_Document();
		$doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid)));
		$doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8'));
		$doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8'));
		$doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8'));
		$doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8'));
		$this->lucene->addDocument($doc);
	}

	/**
	 * Indexes a document based on a text file.
	 *
	 * @param int $docid
	 * @param string $textfile
	 * @return boolean
	 */
    protected function indexDocument($docid, $textfile, $title, $version)
    {
    	global $default;

    	if (!is_file($textfile))
    	{
    		$default->log->error(sprintf(_kt("Attempting to index %d %s but it is not available."),$docid, $textfile));
    		return false;
    	}

    	list($content, $discussion, $title2, $version2) = $this->deleteDocument($docid);

    	$this->addDocument($docid, file_get_contents($textfile), $discussion, $title, $version);

		return true;
    }

    /**
     * Indexes the content and discussions on a document.
     *
     * @param int $docid
     * @param string $textfile
     * @return boolean
     */
    protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version)
    {
		global $default;

    	if (!is_file($textfile))
    	{
    		$default->log->error(sprintf(_kt("Attempting to index %d %s but it is not available."),$docid, $textfile));
    		return false;
    	}

    	$this->deleteDocument($docid);

    	$this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title, $version);

    	return true;
    }

    /**
     * Indexes a discussion on a document..
     *
     * @param int $docid
     * @return boolean
     */
    protected function indexDiscussion($docid)
    {
		list($content, $discussion, $title, $version) = $this->deleteDocument($docid);

		$this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title, $version);

		return true;
    }

    /**
     * Optimise the lucene index.
     * This can be called periodically to optimise performance and size of the lucene index.
     *
     */
    public function optimise()
    {
    	parent::optimise();
    	$this->lucene->optimize();
    }

    /**
     * Returns the number of non-deleted documents in the index.
     *
     * @return int
     */
    public function getDocumentsInIndex()
    {
    	return $this->lucene->numDocs();
    }

    /**
     * Removes a document from the index.
     *
     * @param int $docid
     * @return array containing (content, discussion, title)
     */
    public function deleteDocument($docid)
    {
    	$content = '';
    	$discussion = '';
    	$query = Zend_Search_Lucene_Search_QueryParser::parse('DocumentID:' . PHPLuceneIndexer::longToString($docid));
    	$hits  = $this->lucene->find($query);
    	// there should only be one, but we'll loop for safety
    	foreach ($hits as $hit)
    	{
    		$content = $hit->Content;
    		$discussion = $hit->Discussion;
    		$title = $hit->Title;
    		$version = $hit->Version;

    		$this->lucene->delete($hit);
    	}
    	return array($content, $discussion, $title, $version);
    }

    /**
     * Enter description here...
     *
     * @param string $query
     * @return array
     */
    public function query($query)
    {
        $results = array();
        $queryDiscussion = stripos($query,'discussion') !== false;
        $queryContent = stripos($query,'content') !== false;
        $query = Zend_Search_Lucene_Search_QueryParser::parse($query);

        $hits  = $this->lucene->find($query);
        foreach ($hits as $hit)
        {
            $document = $hit->getDocument();

            $document_id = PHPLuceneIndexer::stringToLong($document->DocumentID);

            $coreText = '';
            if ($queryContent)
            {
            	$coreText .= $document->Content;
            }
            if ($queryDiscussion)
            {
            	$coreText .= $document->Discussion;
            }

            $content = $query->highlightMatches($coreText);

            $title = $document->Title;
            $score = $hit->score;

            // avoid adding duplicates. If it is in already, it has higher priority.
            if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score)
            {
                $item = new QueryResultItem($document_id,  $score, $title,  $content);
                if ($item->CanBeReadByUser)
                {
                	$results[$document_id] = $item;
                }
            }
        }
        return $results;
    }

    /**
     * Diagnose the indexer. e.g. Check that the indexing server is running.
     *
     */
    public function diagnose()
    {
    	if ($this->lucene == null)
    	{
    		$indexer = $this->getDisplayName();
    		return sprintf(_kt("The %s has not been initialised correctly. Please review the documentation on how to setup the indexing."),$indexer);
    	}
 		return null;
    }

    /**
     * Returns the name of the indexer.
     *
     * @return string
     */
	public function getDisplayName()
	{
		return _kt('Document Indexer Library');
	}
}
?>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -