⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indexercore.inc.php.svn-base

📁 PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。
💻 SVN-BASE
📖 第 1 页 / 共 4 页
字号:
    public function restartBatch()    {        $this->restartCurrentBatch = true;    }     /**     *     * @param int $documentId     * @param string $message     * @param string $level This may be info, error, debug     */    private function logPendingDocumentInfoStatus($documentId, $message, $level)    {		$this->updatePendingDocumentStatus($documentId, $message, $level);		global $default;		switch ($level)		{			case 'debug':				if ($this->debug)				{					$default->log->debug($message);				}				break;			default:				$default->log->$level($message);		}    }	public function getExtractor($extractorClass)	{		if (empty($extractorClass))		{			return null;		}		$includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php';		if (!file_exists($includeFile))		{			throw new Exception("Extractor file does not exist: $includeFile");		}		require_once($includeFile);        if (!class_exists($extractorClass))        {        	throw new Exception("Extractor '$classname' not defined in file: $includeFile");        }        $extractor = new $extractorClass();        if (!($extractor instanceof DocumentExtractor))		{        	throw new Exception("Class $classname was expected to be of type DocumentExtractor");		}        return $extractor;	}	public static function getIndexingQueue($problemItemsOnly=true)	{		if ($problemItemsOnly)		{			$sql = "SELECT	        			iff.document_id, iff.indexdate, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what, iff.status_msg, dcv.filename					FROM						index_files iff						INNER JOIN documents d ON iff.document_id=d.id						INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id						INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id						INNER JOIN mime_types mt ON dcv.mime_id=mt.id						LEFT JOIN mime_extractors me ON mt.extractor_id=me.id	 				WHERE	 					(iff.status_msg IS NOT NULL AND iff.status_msg <> '') AND d.status_id=1					ORDER BY indexdate ";		}		else		{			$sql = "SELECT						iff.document_id, iff.indexdate, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what, iff.status_msg, dcv.filename					FROM						index_files iff						INNER JOIN documents d ON iff.document_id=d.id						INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id						INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id						INNER JOIN mime_types mt ON dcv.mime_id=mt.id						LEFT JOIN mime_extractors me ON mt.extractor_id=me.id	 				WHERE	 					(iff.status_msg IS NULL or iff.status_msg = '') AND d.status_id=1					ORDER BY indexdate ";		}		$aResult = DBUtil::getResultArray($sql);		return $aResult;	}	public static function getPendingIndexingQueue()	{		return Indexer::getIndexingQueue(false);	}	public function updateIndexStats()	{	    $optimisationDate = KTUtil::getSystemSetting('luceneOptimisationDate', '');	    $noOptimisation = false;	    if ($optimisationDate == '')	    {	        $optimisationDate = _kt('N/A');	        $optimisationPeriod = $optimisationDate;	    }	    else	    {	        $optimisationPeriod = KTUtil::computePeriodToDate($optimisationDate, null, true);	        $noOptimisation = $optimisationPeriod['days'] > 2;	        $optimisationPeriod = $optimisationPeriod['str'];	        $optimisationDate = date('Y-m-d H:i:s', $optimisationDate);	    }	    $indexingDate = KTUtil::getSystemSetting('luceneIndexingDate', '');	    if ($indexingDate == '')	    {	        $indexingDate = _kt('N/A');	        $indexingPeriod = $indexingDate;	    }	    else	    {	        $indexingPeriod = KTUtil::computePeriodToDate($indexingDate);	        $indexingDate = date('Y-m-d H:i:s', $indexingDate);	    }	    $index = Indexer::get();	    $docsInIndex = $index->getDocumentsInIndex();	    // we are only interested in documents that are active	    $sql = "SELECT count(*) as docsInQueue FROM index_files i inner join documents d on i.document_id = d.id where (i.status_msg is null or i.status_msg = '') and d.status_id=1";	    $docsInQueue = DBUtil::getOneResultKey($sql, 'docsInQueue');	    $sql = "SELECT count(*) as errorsInQueue FROM index_files i inner join documents d on i.document_id = d.id  where (i.status_msg is not null or i.status_msg <> '') and d.status_id=1";	    $errorsInQueue = DBUtil::getOneResultKey($sql, 'errorsInQueue');	    $sql = "SELECT count(*) as docsInRepository FROM documents where status_id=1";	    $docsInRepository = DBUtil::getOneResultKey($sql, 'docsInRepository');	    if ($docsInRepository == 0)	    {	        $indexingCoverage = '0.00%';	        $queueCoverage = $indexingCoverage;	    }	    else	    {	        // compute indexing coverage	        $indexingCoverage = _kt('Not Available');	        if (is_numeric($docsInIndex))	        {	            $indexingCoverage = ($docsInIndex * 100) / $docsInRepository;	            $indexingCoverage = number_format($indexingCoverage, 2, '.',',') . '%';	        }	        // compute queue coverage	        $queueCoverage = _kt('Not Available');	        if (is_numeric($docsInQueue))	        {	            $queueCoverage = ($docsInQueue * 100) / $docsInRepository;	            $queueCoverage = number_format($queueCoverage, 2, '.',',') . '%';	        }	    }	    $stats = array(	    'optimisationDate'=>$optimisationDate,	    'optimisationPeriod'=>$optimisationPeriod,	    'indexingDate'=>$indexingDate,	    'indexingPeriod'=>$indexingPeriod,	    'docsInIndex'=>$docsInIndex,	    'docsInQueue'=>$docsInQueue,	    'errorsInQueue'=>$errorsInQueue,	    'docsInRepository'=>$docsInRepository,	    'indexingCoverage'=>$indexingCoverage,	    'queueCoverage'=>$queueCoverage,	    'noOptimisation'=>$noOptimisation	    );	    KTUtil::setSystemSetting('indexerStats', serialize($stats));	    $indexer = Indexer::get();	    $diagnosis = $indexer->diagnose();	    KTUtil::setSystemSetting('indexerDiagnostics', serialize($diagnosis));	    $extractorDiagnosis = $indexer->diagnoseExtractors();	    KTUtil::setSystemSetting('extractorDiagnostics', serialize($extractorDiagnosis));	}    /**     * The main function that may be called repeatedly to index documents.     *     * @param int $max Default 20     */    public function indexDocuments($max=null)    {    	global $default;    	$config =& KTConfig::getSingleton();    	/*$indexLockFile = $config->get('cache/cacheDirectory') . '/main.index.lock';    	if (is_file($indexLockFile))    	{			$default->log->info('indexDocuments: main.index.lock seems to exist. it could be that the indexing is still underway.');			$default->log->info('indexDocuments: Remove "' . $indexLockFile . '" if the indexing is not running or extend the frequency at which the background task runs!');			return;    	}    	touch($indexLockFile);*/    	$this->checkForRegisteredTypes();    	if ($this->debug) $default->log->debug('indexDocuments: start');    	if (!$this->doesDiagnosticsPass())    	{    		//unlink($indexLockFile);    		if ($this->debug) $default->log->debug('indexDocuments: stopping - diagnostics problem. The dashboard will provide more information.');    		return;    	}    	if (is_null($max))    	{			$max = $config->get('indexer/batchDocuments',20);    	}    	$this->loadExtractorHooks();    	Indexer::clearoutDeleted();    	$date = date('Y-m-d H:i:s');    	// identify the indexers that must run        // mysql specific limit!        $sql = "SELECT        			iff.document_id, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what				FROM					index_files iff					INNER JOIN documents d ON iff.document_id=d.id					INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id					INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id					INNER JOIN mime_types mt ON dcv.mime_id=mt.id					LEFT JOIN mime_extractors me ON mt.extractor_id=me.id 				WHERE 					(iff.processdate IS NULL or iff.processdate < date_sub('$date', interval 1 day)) AND dmv.status_id=1				ORDER BY indexdate 					LIMIT $max";        $result = DBUtil::getResultArray($sql);        if (PEAR::isError($result))        {        	//unlink($indexLockFile);        	if ($this->debug) $default->log->debug('indexDocuments: stopping - db error');        	return;        }        KTUtil::setSystemSetting('luceneIndexingDate', time());        // bail if no work to do        if (count($result) == 0)        {        	//unlink($indexLockFile);        	if ($this->debug) $default->log->debug('indexDocuments: stopping - no work to be done');            return;        }        // identify any documents that need indexing and mark them        // so they are not taken in a followup run		$ids = array();		foreach($result as $docinfo)		{			$ids[] = $docinfo['document_id'];		}		// mark the documents as being processed        $ids=implode(',',$ids);        $sql = "UPDATE index_files SET processdate='$date' WHERE document_id in ($ids)";        DBUtil::runQuery($sql);        $extractorCache = array();        $storageManager = KTStorageManagerUtil::getSingleton();        $tempPath = $config->get("urls/tmpDirectory");        foreach($result as $docinfo)        {        // increment indexed documents count        Indexer::incrementCount();        	$docId=$docinfo['document_id'];        	$extension=$docinfo['filetypes'];        	$mimeType=$docinfo['mimetypes'];        	$extractorClass=$docinfo['extractor'];        	$indexDocument = in_array($docinfo['what'], array('A','C'));        	$indexDiscussion = in_array($docinfo['what'], array('A','D'));			$this->indexingHistory = '';        	$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass), 'debug');        	if (empty($extractorClass))        	{        		/*        		if no extractor is found and we don't need to index discussions, then we can remove the item from the queue.        		*/        		if ($indexDiscussion)        		{        			$indexDocument = false;        			$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Not indexing docid: %d content because extractor could not be resolve. Still indexing discussion."), $docId), 'info');        		}        		else        		{        			Indexer::unqueueDocument($docId, sprintf(_kt("No extractor for docid: %d"),$docId));        			continue;        		}        	}        	else        	{        		/*        		If an extractor is available, we must ensure it is enabled.        		 */	        	if (!$this->isExtractorEnabled($extractorClass))				{					$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info');					continue;				}        	}        	if ($this->debug)        	{        		$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"),$docId), 'info');        	}        	$document = Document::get($docId);        	if (PEAR::isError($document))        	{        		Indexer::unqueueDocument($docId,sprintf(_kt("indexDocuments: Cannot resolve document id %d: %s."),$docId, $document->getMessage()), 'error');        		continue;        	}        	if ($this->restartCurrentBatch)			{			    Indexer::unqueueDocument($docId);        		Indexer::index($docId, 'A');			    continue;			}        	$filename = $document->getFileName();        	if (substr($filename,0,1) == '~' || substr($filename,-1) == '~')        	{        		Indexer::unqueueDocument($docId,sprintf(_kt("indexDocuments: Filename for document id %d starts with a tilde (~). This is assumed to be a temporary file. This is ignored."),$docId), 'error');        		continue;        	}        	$removeFromQueue = true;        	if ($indexDocument)        	{        		if (array_key_exists($extractorClass, $extractorCache))        		{        			$extractor = $extractorCache[$extractorClass];        		}        		else        		{        			$extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass);        		}				if (!($extractor instanceof DocumentExtractor))				{					$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass), 'error');					continue;				}        		$version = $document->getMajorVersionNumber() . '.' . $document->getMinorVersionNumber();        		$sourceFile = $storageManager->temporaryFile($document);        		if (empty($sourceFile) || !is_file($sourceFile))        		{        			Indexer::unqueueDocument($docId,sprintf(_kt("indexDocuments: source file '%s' for document %d does not exist."),$sourceFile,$docId), 'error');        			continue;        		}        		if ($extractor->needsIntermediateSourceFile())        		{        			//$extension =  pathinfo($document->getFileName(), PATHINFO_EXTENSION);        			$intermediate = $tempPath . '/'. $docId . '.' . $extension;        			$result = @copy($sourceFile, $intermediate);        			if ($result === false)        			{        				$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"),$docId), 'error');        				// problem. lets try again later. probably permission related. log the issue.        				continue;        			}        			$sourceFile = $intermediate;        		}        		$targetFile = tempnam($tempPath, 'ktindexer');        		$extractor->setSourceFile($sourceFile);        		$extractor->setMimeType($mimeType);        		$extractor->setExtension($extension);        		$extractor->setTargetFile($targetFile);        		$extractor->setDocument($document);        		$extractor->setIndexingStatus(null);        		$extractor->setExtractionStatus(null);        		$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile), 'debug');        		$this->executeHook($extractor, 'pre_extract');				$this->executeHook($extractor, 'pre_extract', $mimeType);				$removeFromQueue = false;        		if ($extractor->extractTextContent())        		{        			// the extractor may need to create another target file        			$targetFile = $extractor->getTargetFile();        			$extractor->setExtractionStatus(true);        			$this->executeHook($extractor, 'pre_index');					$this->executeHook($extractor, 'pre_index', $mimeType);					$title = $document->getName();        			if ($indexDiscussion)        			{        				if (!$this->filterText($targetFile))        				{        					$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"),$docId), 'error');        				}						else						{	        				$indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title, $version);    	    				$removeFromQueue = $indexStatus;        					if (!$indexStatus)        					{        						$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId), 'error');        					}        					$extractor->setIndexingStatus($indexStatus);						}        			}        			else        			{        				if (!$this->filterText($targetFile))        				{        					$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"),$docId), 'error');        				}						else						{							$indexStatus = $this->indexDocument($docId, $targetFile, $title, $version);							$removeFromQueue = $indexStatus;							if (!$indexStatus)							{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -