⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indexercore.inc.php

📁 PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。
💻 PHP
📖 第 1 页 / 共 4 页
字号:

		$this->extractorCache	= array();
		$this->debug 			= $config->get('indexer/debug', true);
		$this->hookCache 		= array();
		$this->generalHookCache = array();
		$this->extractorPath 	= $config->get('indexer/extractorPath', 'extractors');
		$this->hookPath 		= $config->get('indexer/extractorHookPath','extractorHooks');

		$this->loadExtractorStatus();
	}

	/**
	 * Get the list if enabled extractors
	 *
	 */
	private function loadExtractorStatus()
	{
		$sql = "SELECT id, name FROM mime_extractors WHERE active=1";
		$rs = DBUtil::getResultArray($sql);
		$this->enabledExtractors = array();
		foreach($rs as $item)
		{
			$this->enabledExtractors[] = $item['name'];
		}
	}

	private function isExtractorEnabled($extractor)
	{
		return in_array($extractor, $this->enabledExtractors);
	}

	/**
	 * Returns a reference to the main class
	 *
	 * @return Indexer
	 */
	public static function get()
	{
		static $singleton = null;

		if (is_null($singleton))
		{
			$config = KTConfig::getSingleton();
			$classname = $config->get('indexer/coreClass');

			require_once('indexing/indexers/' . $classname . '.inc.php');

			if (!class_exists($classname))
			{
				throw new Exception("Class '$classname' does not exist.");
			}

			$singleton = new $classname;
		}

		return $singleton;
	}

	public abstract function deleteDocument($docid);

	/**
	 * Remove the association of all extractors to mime types on the database.
	 *
	 */
	public function clearExtractors()
	{
		global $default;

		$sql = "update mime_types set extractor_id=null";
		DBUtil::runQuery($sql);

		$sql = "delete from mime_extractors";
		DBUtil::runQuery($sql);

		if ($this->debug) $default->log->debug('clearExtractors');
	}

	/**
	 * lookup the name of the extractor class based on the mime type.
	 *
	 * @param string $type
	 * @return string
	 */
	public static function resolveExtractor($type)
	{
		global $default;
		$sql = "select extractor from mime_types where filetypes='$type'";
		$class = DBUtil::getOneResultKey($sql,'extractor');
		if (PEAR::isError($class))
		{
			$default->log->error("resolveExtractor: cannot resolve $type");
			return $class;
		}
		if ($this->debug) $default->log->debug(sprintf(_kt("resolveExtractor: Resolved '%s' from mime type '%s'."), $class, $type));
		return $class;
	}

	/**
	 * Return all the discussion text.
	 *
	 * @param int $docid
	 * @return string
	 */
	public static function getDiscussionText($docid)
	{
		$sql = "SELECT
					dc.subject, dc.body
				FROM
					discussion_threads dt
					INNER JOIN discussion_comments dc ON dc.thread_id=dt.id AND dc.id BETWEEN dt.first_comment_id AND dt.last_comment_id
				WHERE
					dt.document_id=$docid";
		$result = DBUtil::getResultArray($sql);
		$text = '';

		foreach($result as $record)
		{
			$text .= $record['subject'] . "\n" . $record['body'] . "\n";
		}

		return $text;
	}

	/**
	 * Schedule the indexing of a document.
	 *
	 * @param string $document
	 * @param string $what
	 */
    public static function index($document, $what='A')
    {
    	global $default;

    	if (is_numeric($document))
    	{
    		$document = Document::get($document+0);
    	}

    	if (PEAR::isError($document))
    	{
    		$default->log->error("index: Could not index document: " .$document->getMessage());
    		return;
    	}

        $document_id = $document->getId();
        $userid=$_SESSION['userID'];
        if (empty($userid)) $userid=1;

        // we dequeue the document so that there are no issues when enqueuing
        Indexer::unqueueDocument($document_id);

        // enqueue item
        $sql = "INSERT INTO index_files(document_id, user_id, what) VALUES($document_id, $userid, '$what')";
        DBUtil::runQuery($sql);

        $default->log->debug("index: Queuing indexing of $document_id");

    }

    private static function incrementCount()
    {
        // Get count from system settings
        $count = Indexer::getIndexedDocumentCount();
        $count = (int)$count + 1;
        Indexer::updateIndexedDocumentCount($count);
    }

    public static function getIndexedDocumentCount()
    {
        $count = KTUtil::getSystemSetting('indexedDocumentCount', 0);
        return (int) $count;
    }

    public static function updateIndexedDocumentCount($cnt = 0)
    {
        KTUtil::setSystemSetting('indexedDocumentCount', $cnt);
    }

	public static function reindexQueue()
	{
		$sql = "UPDATE index_files SET processdate = null";
		DBUtil::runQuery($sql);
	}

	public static function reindexDocument($documentId)
	{
		$sql = "UPDATE index_files SET processdate=null, status_msg=null WHERE document_id=$documentId";
		DBUtil::runQuery($sql);
	}



    public static function indexAll()
    {
    	 $userid=$_SESSION['userID'];
    	 if (empty($userid)) $userid=1;

    	$sql = "DELETE FROM index_files";
    	DBUtil::runQuery($sql);

    	$sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'A' FROM documents WHERE status_id=1 and id not in (select document_id from index_files)";
    	DBUtil::runQuery($sql);
    }

    public static function indexFolder($folder)
    {
        $userid=$_SESSION['userID'];
    	if (empty($userid)) $userid=1;

        if (!$folder instanceof Folder && !$folder instanceof FolderProxy)
        {
            throw new Exception('Folder expected');
        }

        $full_path = $folder->getFullPath();

    	$sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'A' FROM documents WHERE full_path like '{$full_path}/%' AND status_id=1 and id not in (select document_id from index_files)";
    	DBUtil::runQuery($sql);
    }

    /**
     * Clearout the scheduling of documents that no longer exist.
     *
     */
    public static function clearoutDeleted()
    {
    	global $default;

        $sql = 'DELETE FROM
					index_files
				WHERE
					document_id in (SELECT d.id FROM documents AS d WHERE d.status_id=3) OR
					NOT EXISTS(SELECT index_files.document_id FROM documents WHERE index_files.document_id=documents.id)';
        DBUtil::runQuery($sql);

        $default->log->debug("Indexer::clearoutDeleted: removed documents from indexing queue that have been deleted");
    }


    /**
     * Check if a document is scheduled to be indexed
     *
     * @param mixed $document This may be a document or document id
     * @return boolean
     */
    public static function isDocumentScheduled($document)
    {
    	if (is_numeric($document))
    	{
    		$docid = $document;
    	}
    	else if ($document instanceof Document)
    	{
    		$docid = $document->getId();
    	}
    	else
    	{
    		return false;
    	}
    	$sql = "SELECT 1 FROM index_files WHERE document_id=$docid";
    	$result = DBUtil::getResultArray($sql);
    	return count($result) > 0;
    }

    /**
     * Filters text removing redundant characters such as continuous newlines and spaces.
     *
     * @param string $filename
     */
    private function filterText($filename)
    {
    	$content = file_get_contents($filename);

    	$src = array("([\r\n])","([\n][\n])","([\n])","([\t])",'([ ][ ])');
    	$tgt = array("\n","\n",' ',' ',' ');

    	// shrink what is being stored.
    	do
    	{
    		$orig = $content;
    		$content = preg_replace($src, $tgt, $content);
    	} while ($content != $orig);

    	return file_put_contents($filename, $content) !== false;
    }

    /**
     * Load hooks for text extraction process.
     *
     */
    private function loadExtractorHooks()
    {
    	$this->generalHookCache = array();
    	$this->mimeHookCache = array();


		$dir = opendir(SearchHelper::correctPath($this->hookPath));
		while (($file = readdir($dir)) !== false)
		{
			if (substr($file,-12) == 'Hook.inc.php')
			{
				require_once($this->hookPath . '/' . $file);
				$class = substr($file, 0, -8);

				if (!class_exists($class))
				{
					continue;
				}

				$hook = new $class;
				if (!($class instanceof ExtractorHook))
				{
					continue;
				}

				$mimeTypes = $hook->registerMimeTypes();
				if (is_null($mimeTypes))
				{
					$this->generalHookCache[] = & $hook;
				}
				else
				{
					foreach($mimeTypes as $type)
					{
						$this->mimeHookCache[$type][] = & $hook;
					}
				}

			}
        }
        closedir($dir);
    }

    /**
     * This is a refactored function to execute the hooks.
     *
     * @param DocumentExtractor $extractor
     * @param string $phase
     * @param string $mimeType Optional. If set, indicates which hooks must be used, else assume general.
     */
    private function executeHook($extractor, $phase, $mimeType = null)
    {
    	$hooks = array();
		if (is_null($mimeType))
		{
			$hooks = $this->generalHookCache;
		}
		else
		{
			if (array_key_exists($mimeType, $this->mimeHookCache))
			{
				$hooks = $this->mimeHookCache[$mimeType];
			}
		}
		if (empty($hooks))
		{
			return;
		}

		foreach($hooks as $hook)
		{
			$hook->$phase($extractor);
		}
    }

    private function doesDiagnosticsPass($simple=false)
    {
		global $default;

    	$config =& KTConfig::getSingleton();
		// create a index log lock file in case there are errors, and we don't need to log them forever!
    	// this function will create the lockfile if an error is detected. It will be removed as soon
    	// as the problems with the indexer are removed.
    	$lockFile = $config->get('cache/cacheDirectory') . '/index.log.lock';

    	$diagnosis = $this->diagnose();
    	if (!is_null($diagnosis))
    	{
			if (!is_file($lockFile))
			{
				$default->log->error(_kt('Indexer problem: ') . $diagnosis);
			}
			touch($lockFile);
    		return false;
    	}

    	if ($simple)
    	{
    		return true;
    	}

    	$diagnosis = $this->diagnoseExtractors();
    	if (!empty($diagnosis))
    	{
    		if (!is_file($lockFile))
			{
	    		foreach($diagnosis as $diag)
	    		{
    				$default->log->error(sprintf(_kt('%s problem: %s'), $diag['name'],$diag['diagnosis']));
    			}
			}
			touch($lockFile);
    		return false;
    	}

    	if (is_file($lockFile))
    	{
    		$default->log->info(_kt('Issues with the indexer have been resolved!'));
    		unlink($lockFile);
    	}

    	return true;
    }

    /**
     * This does the initial mime type association between mime types and text extractors
     *
     */
    public function checkForRegisteredTypes()
    {
    	global $default;

    	// we are only doing this once!
    	$initRegistered = KTUtil::getSystemSetting('mimeTypesRegistered', false);
    	if ($initRegistered)
    	{
    		return;
    	}
    	if ($this->debug) $default->log->debug('checkForRegisteredTypes: start');

    	$date = date('Y-m-d H:i');
    	$sql = "UPDATE scheduler_tasks SET run_time='$date'";
    	DBUtil::runQuery($sql);

    	$this->registerTypes(true);

    	$disable = array(
    		'windows'=>array('PSExtractor'),
    		'unix' => array()
    	);

    	$disableForOS = OS_WINDOWS?$disable['windows']:$disable['unix'];

    	if (!empty($disableForOS))
    	{
    	   $disableForOS = '\'' . implode("','", $disableForOS) .'\'';

    		$sql = "UPDATE mime_extractors SET active=0 WHERE name in ($disableForOS)";
    		DBUtil::runQuery($sql);
    		$default->log->info("checkForRegisteredTypes: disabled '$extractor'");
    	}
        $this->loadExtractorStatus();

    	if ($this->debug) $default->log->debug('checkForRegisteredTypes: done');
    	KTUtil::setSystemSetting('mimeTypesRegistered', true);
    }

    private function updatePendingDocumentStatus($documentId, $message, $level)
    {
    	$this->indexingHistory .=  "\n" . $level . ': ' . $message;
    	$message = sanitizeForSQL($this->indexingHistory);
    	$sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId";
    	DBUtil::runQuery($sql);
    }

    private $restartCurrentBatch = false;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -