⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indexercore.inc.php

📁 PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。
💻 PHP
📖 第 1 页 / 共 4 页
字号:
								$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error');
								$this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
							}

        					$extractor->setIndexingStatus($indexStatus);
						}
        			}

					$this->executeHook($extractor, 'post_index', $mimeType);
        			$this->executeHook($extractor, 'post_index');
        		}
        		else
        		{
        			$extractor->setExtractionStatus(false);
        			$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error');
					$this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
        		}

				$this->executeHook($extractor, 'post_extract', $mimeType);
        		$this->executeHook($extractor, 'post_extract');

        		if ($extractor->needsIntermediateSourceFile())
        		{
        			@unlink($sourceFile);
        		}

        		@unlink($targetFile);

        	}
        	else
        	{
				$indexStatus = $this->indexDiscussion($docId);
				$removeFromQueue = $indexStatus;
        	}

        	if ($removeFromQueue)
        	{
        		Indexer::unqueueDocument($docId, sprintf(_kt("Done indexing docid: %d"),$docId));
        	}
        	else
        	{
        		if ($this->debug) $default->log->debug(sprintf(_kt("Document docid: %d was not removed from the queue as it looks like there was a problem with the extraction process"),$docId));
        	}
        }
        if ($this->debug) $default->log->debug('indexDocuments: done');
        //unlink($indexLockFile);
    }

    public function migrateDocuments($max=null)
    {
    	global $default;

    	$default->log->info(_kt('migrateDocuments: starting'));

    	if (!$this->doesDiagnosticsPass(true))
    	{
    		$default->log->info(_kt('migrateDocuments: stopping - diagnostics problem. The dashboard will provide more information.'));
    		return;
    	}

    	if (KTUtil::getSystemSetting('migrationComplete') == 'true')
    	{
    		$default->log->info(_kt('migrateDocuments: stopping - migration is complete.'));
    		return;
    	}

    	$config =& KTConfig::getSingleton();
    	if (is_null($max))
    	{
			$max = $config->get('indexer/batchMigrateDocument',500);
    	}

    	$lockFile = $config->get('cache/cacheDirectory') . '/migration.lock';
    	if (is_file($lockFile))
    	{
    		$default->log->info(_kt('migrateDocuments: stopping - migration lockfile detected.'));
    		return;
    	}
    	touch($lockFile);

    	$startTime = KTUtil::getSystemSetting('migrationStarted');
    	if (is_null($startTime))
    	{
    		KTUtil::setSystemSetting('migrationStarted', time());
    	}

    	$maxLoops = 5;

    	$max = ceil($max / $maxLoops);

		$start =KTUtil::getBenchmarkTime();
		$noDocs = false;
		$numDocs = 0;

    	for($loop=0;$loop<$maxLoops;$loop++)
    	{

    		$sql = "SELECT
        			document_id, document_text
				FROM
					document_text
				ORDER BY document_id
 					LIMIT $max";
    		$result = DBUtil::getResultArray($sql);
    		if (PEAR::isError($result))
    		{
    			$default->log->info(_kt('migrateDocuments: db error'));
    			break;
    		}

    		$docs = count($result);
    		if ($docs == 0)
    		{
    			$noDocs = true;
    			break;
    		}
    		$numDocs += $docs;

    		foreach($result as $docinfo)
    		{
    			$docId = $docinfo['document_id'];

    			$document = Document::get($docId);
    			if (PEAR::isError($document) || is_null($document))
    			{
    				$sql = "DELETE FROM document_text WHERE document_id=$docId";
    				DBUtil::runQuery($sql);
    				$default->log->error(sprintf(_kt('migrateDocuments: Could not get document %d\'s document! Removing content!'),$docId));
    				continue;
    			}

    			$version = $document->getMajorVersionNumber() . '.' . $document->getMinorVersionNumber();

    			$targetFile = tempnam($tempPath, 'ktindexer');

    			if (file_put_contents($targetFile, $docinfo['document_text']) === false)
    			{
    				$default->log->error(sprintf(_kt('migrateDocuments: Cannot write to \'%s\' for document id %d'), $targetFile, $docId));
    				continue;
    			}
    			// free memory asap ;)
    			unset($docinfo['document_text']);

    			$title = $document->getName();

    			$indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title, $version);

    			if ($indexStatus)
    			{
    				$sql = "DELETE FROM document_text WHERE document_id=$docId";
    				DBUtil::runQuery($sql);
    			}
    			else
    			{
    				$default->log->error(sprintf(_kt("migrateDocuments: Problem indexing document %d"), $docId));
    			}

    			@unlink($targetFile);
    		}
    	}

    	@unlink($lockFile);

    	$time = KTUtil::getBenchmarkTime() - $start;

    	KTUtil::setSystemSetting('migrationTime', KTUtil::getSystemSetting('migrationTime',0) + $time);
    	KTUtil::setSystemSetting('migratedDocuments', KTUtil::getSystemSetting('migratedDocuments',0) + $numDocs);

    	$default->log->info(sprintf(_kt('migrateDocuments: stopping - done in %d seconds!'), $time));
    	if ($noDocs)
    	{
	    	$default->log->info(_kt('migrateDocuments: Completed!'));
	    	KTUtil::setSystemSetting('migrationComplete', 'true');
	    	schedulerUtil::deleteByName('Index Migration');
	    	$default->log->debug(_kt('migrateDocuments: Disabling \'Index Migration\' task by removing scheduler entry.'));
    	}
    }

    /**
     * Index a document. The base class must override this function.
     *
     * @param int $docId
     * @param string $textFile
     */
    protected abstract function indexDocument($docId, $textFile, $title, $version);


    public function updateDocumentIndex($docId, $text)
    {
    	$config = KTConfig::getSingleton();
    	$tempPath = $config->get("urls/tmpDirectory");
    	$tempFile = tempnam($tempPath,'ud_');

    	file_put_contents($tempFile, $text);

    	$document = Document::get($docId);
    	$title = $document->getDescription();
    	$version = $document->getVersion();

    	$result = $this->indexDocument($docId, $tempFile, $title, $version);

    	if (file_exists($tempFile))
    	{
    		unlink($tempFile);
    	}

    	return $result;
    }

    /**
     * Index a discussion. The base class must override this function.
     *
     * @param int $docId
     */
    protected abstract function indexDiscussion($docId);

    /**
     * Diagnose the indexer. e.g. Check that the indexing server is running.
     *
     */
	public abstract function diagnose();

    /**
     * Diagnose the extractors.
     *
     * @return array
     */
    public function diagnoseExtractors()
    {
		$diagnosis = $this->_diagnose($this->extractorPath, 'DocumentExtractor', 'Extractor.inc.php');
		$diagnosis = array_merge($diagnosis, $this->_diagnose($this->hookPath, 'Hook', 'Hook.inc.php'));

		return $diagnosis;
    }

    /**
     * This is a refactored diagnose function.
     *
     * @param string $path
     * @param string $class
     * @param string $extension
     * @return array
     */
    private function _diagnose($path, $baseclass, $extension)
    {
    	global $default;

    	$diagnoses = array();

    	$dir = opendir(SearchHelper::correctPath($path));
    	$extlen = - strlen($extension);

		while (($file = readdir($dir)) !== false)
		{
			if (substr($file,0,1) == '.')
			{
				continue;
			}
			if (substr($file,$extlen) != $extension)
			{
				$default->log->error(sprintf(_kt("diagnose: '%s' does not have extension '%s'."), $file, $extension));
				continue;
			}

			require_once($path . '/' . $file);

			$class = substr($file, 0, -8);
			if (!class_exists($class))
			{
				$default->log->error(sprintf(_kt("diagnose: class '%s' does not exist."), $class));
				continue;
			}

			if (!$this->isExtractorEnabled($class))
			{
				$default->log->debug(sprintf(_kt("diagnose: extractor '%s' is disabled."), $class));
				continue;
			}

			$extractor = new $class();
			if (!is_a($extractor, $baseclass))
			{
				$default->log->error(sprintf(_kt("diagnose(): '%s' is not of type DocumentExtractor"), $class));
				continue;
			}

			$types = $extractor->getSupportedMimeTypes();
			if (empty($types))
			{
				if ($this->debug) $default->log->debug(sprintf(_kt("diagnose: class '%s' does not support any types."), $class));
				continue;
			}

			$diagnosis=$extractor->diagnose();
			if (empty($diagnosis))
			{
				continue;
			}
			$diagnoses[$class] = array(
			'name'=>$extractor->getDisplayName(),
			'diagnosis'=>$diagnosis
			);

        }
        closedir($dir);

        return $diagnoses;
    }


    /**
     * Register the extractor types.
     *
     * @param boolean $clear. Optional. Defaults to false.
     */
    public function registerTypes($clear=false)
    {
    	if ($clear)
    	{
    		$this->clearExtractors();
    	}
    	$dir = opendir(SearchHelper::correctPath($this->extractorPath));
		while (($file = readdir($dir)) !== false)
		{
			if (substr($file,-17) == 'Extractor.inc.php')
			{
				require_once($this->extractorPath . '/' . $file);
				$class = substr($file, 0, -8);

				if (!class_exists($class))
				{
					// if the class does not exist, we can't do anything.
					continue;
				}

				$extractor = new $class;
				if ($extractor instanceof DocumentExtractor)
				{
					$extractor->registerMimeTypes();
				}
			}
        }
        closedir($dir);
    }

    /**
     * This is used as a possible obtimisation effort. It may be overridden in that case.
     *
     * @param int $docId
     * @param string $textFile
     */
    protected function indexDocumentAndDiscussion($docId, $textFile, $title, $version)
    {
    	$this->indexDocument($docId, $textFile, $title, $version);
    	$this->indexDiscussion($docId);
    }

    /**
     * Remove the document from the queue. This is normally called when it has been processed.
     *
     * @param int $docid
     */
    public static function unqueueDocument($docid, $reason=false, $level='debug')
    {
    	$sql = "DELETE FROM index_files WHERE document_id=$docid";
        DBUtil::runQuery($sql);
        if ($reason !== false)
        {
        	global $default;
        	$default->log->$level("Indexer: removing document $docid from the queue - $reason");
        }
    }

    /**
     * Run a query on the index.
     *
     * @param string $query
     * @return array
     */
    public abstract function query($query);

	/**
	 * Converts an integer to a string that can be easily compared and reversed.
	 *
	 * @param int $int
	 * @return string
	 */
	public static function longToString($int)
    {
    	$maxlen = 14;

        $a2z = array('a','b','c','d','e','f','g','h','i','j');
        $o29 = array('0','1','2','3','4','5','6','7','8','9');
        $l = str_pad('',$maxlen - strlen("$int"),'0') . $int;

        return str_replace($o29,  $a2z, $l);
    }

    /**
     * Converts a string to an integer.
     *
     * @param string $str
     * @return int
     */
	public static function stringToLong($str)
    {
        $a2z = array('a','b','c','d','e','f','g','h','i','j');
        $o29 = array('0','1','2','3','4','5','6','7','8','9');

        $int = str_replace($a2z, $o29, $str) + 0;

        return $int;
    }

    /**
     * Possibly we can optimise indexes. This method must be overriden.
     * The new function must call the parent!
     *
     */
    public function optimise()
    {
    	KTUtil::setSystemSetting('luceneOptimisationDate', time());
    }

    /**
     * Shuts down the indexer
     *
     */
    public function shutdown()
    {
    	// do nothing generally
    }

    /**
     * Returns the name of the indexer.
     *
     * @return string
     */
    public abstract function getDisplayName();


    /**
     * Returns the number of non-deleted documents in the index.
     *
     * @return int
     */
    public abstract function getDocumentsInIndex();

    public abstract function isDocumentIndexed($documentId);

    /**
     * Returns the path to the index directory
     *
     * @return string
     */
    public function getIndexDirectory()
    {
    	$config = KTConfig::getSingleton();
    	$directory = $config->get('indexer/luceneDirectory');
    	return $directory;
    }
}

?>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -