📄 indexercore.inc.php.svn-base
字号:
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error'); $this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error'); } $extractor->setIndexingStatus($indexStatus); } } $this->executeHook($extractor, 'post_index', $mimeType); $this->executeHook($extractor, 'post_index'); } else { $extractor->setExtractionStatus(false); $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error'); $this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error'); } $this->executeHook($extractor, 'post_extract', $mimeType); $this->executeHook($extractor, 'post_extract'); if ($extractor->needsIntermediateSourceFile()) { @unlink($sourceFile); } @unlink($targetFile); } else { $indexStatus = $this->indexDiscussion($docId); $removeFromQueue = $indexStatus; } if ($removeFromQueue) { Indexer::unqueueDocument($docId, sprintf(_kt("Done indexing docid: %d"),$docId)); } else { if ($this->debug) $default->log->debug(sprintf(_kt("Document docid: %d was not removed from the queue as it looks like there was a problem with the extraction process"),$docId)); } } if ($this->debug) $default->log->debug('indexDocuments: done'); //unlink($indexLockFile); } public function migrateDocuments($max=null) { global $default; $default->log->info(_kt('migrateDocuments: starting')); if (!$this->doesDiagnosticsPass(true)) { $default->log->info(_kt('migrateDocuments: stopping - diagnostics problem. The dashboard will provide more information.')); return; } if (KTUtil::getSystemSetting('migrationComplete') == 'true') { $default->log->info(_kt('migrateDocuments: stopping - migration is complete.')); return; } $config =& KTConfig::getSingleton(); if (is_null($max)) { $max = $config->get('indexer/batchMigrateDocument',500); } $lockFile = $config->get('cache/cacheDirectory') . '/migration.lock'; if (is_file($lockFile)) { $default->log->info(_kt('migrateDocuments: stopping - migration lockfile detected.')); return; } touch($lockFile); $startTime = KTUtil::getSystemSetting('migrationStarted'); if (is_null($startTime)) { KTUtil::setSystemSetting('migrationStarted', time()); } $maxLoops = 5; $max = ceil($max / $maxLoops); $start =KTUtil::getBenchmarkTime(); $noDocs = false; $numDocs = 0; for($loop=0;$loop<$maxLoops;$loop++) { $sql = "SELECT document_id, document_text FROM document_text ORDER BY document_id LIMIT $max"; $result = DBUtil::getResultArray($sql); if (PEAR::isError($result)) { $default->log->info(_kt('migrateDocuments: db error')); break; } $docs = count($result); if ($docs == 0) { $noDocs = true; break; } $numDocs += $docs; foreach($result as $docinfo) { $docId = $docinfo['document_id']; $document = Document::get($docId); if (PEAR::isError($document) || is_null($document)) { $sql = "DELETE FROM document_text WHERE document_id=$docId"; DBUtil::runQuery($sql); $default->log->error(sprintf(_kt('migrateDocuments: Could not get document %d\'s document! Removing content!'),$docId)); continue; } $version = $document->getMajorVersionNumber() . '.' . $document->getMinorVersionNumber(); $targetFile = tempnam($tempPath, 'ktindexer'); if (file_put_contents($targetFile, $docinfo['document_text']) === false) { $default->log->error(sprintf(_kt('migrateDocuments: Cannot write to \'%s\' for document id %d'), $targetFile, $docId)); continue; } // free memory asap ;) unset($docinfo['document_text']); $title = $document->getName(); $indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title, $version); if ($indexStatus) { $sql = "DELETE FROM document_text WHERE document_id=$docId"; DBUtil::runQuery($sql); } else { $default->log->error(sprintf(_kt("migrateDocuments: Problem indexing document %d"), $docId)); } @unlink($targetFile); } } @unlink($lockFile); $time = KTUtil::getBenchmarkTime() - $start; KTUtil::setSystemSetting('migrationTime', KTUtil::getSystemSetting('migrationTime',0) + $time); KTUtil::setSystemSetting('migratedDocuments', KTUtil::getSystemSetting('migratedDocuments',0) + $numDocs); $default->log->info(sprintf(_kt('migrateDocuments: stopping - done in %d seconds!'), $time)); if ($noDocs) { $default->log->info(_kt('migrateDocuments: Completed!')); KTUtil::setSystemSetting('migrationComplete', 'true'); schedulerUtil::deleteByName('Index Migration'); $default->log->debug(_kt('migrateDocuments: Disabling \'Index Migration\' task by removing scheduler entry.')); } } /** * Index a document. The base class must override this function. * * @param int $docId * @param string $textFile */ protected abstract function indexDocument($docId, $textFile, $title, $version); public function updateDocumentIndex($docId, $text) { $config = KTConfig::getSingleton(); $tempPath = $config->get("urls/tmpDirectory"); $tempFile = tempnam($tempPath,'ud_'); file_put_contents($tempFile, $text); $document = Document::get($docId); $title = $document->getDescription(); $version = $document->getVersion(); $result = $this->indexDocument($docId, $tempFile, $title, $version); if (file_exists($tempFile)) { unlink($tempFile); } return $result; } /** * Index a discussion. The base class must override this function. * * @param int $docId */ protected abstract function indexDiscussion($docId); /** * Diagnose the indexer. e.g. Check that the indexing server is running. * */ public abstract function diagnose(); /** * Diagnose the extractors. * * @return array */ public function diagnoseExtractors() { $diagnosis = $this->_diagnose($this->extractorPath, 'DocumentExtractor', 'Extractor.inc.php'); $diagnosis = array_merge($diagnosis, $this->_diagnose($this->hookPath, 'Hook', 'Hook.inc.php')); return $diagnosis; } /** * This is a refactored diagnose function. * * @param string $path * @param string $class * @param string $extension * @return array */ private function _diagnose($path, $baseclass, $extension) { global $default; $diagnoses = array(); $dir = opendir(SearchHelper::correctPath($path)); $extlen = - strlen($extension); while (($file = readdir($dir)) !== false) { if (substr($file,0,1) == '.') { continue; } if (substr($file,$extlen) != $extension) { $default->log->error(sprintf(_kt("diagnose: '%s' does not have extension '%s'."), $file, $extension)); continue; } require_once($path . '/' . $file); $class = substr($file, 0, -8); if (!class_exists($class)) { $default->log->error(sprintf(_kt("diagnose: class '%s' does not exist."), $class)); continue; } if (!$this->isExtractorEnabled($class)) { $default->log->debug(sprintf(_kt("diagnose: extractor '%s' is disabled."), $class)); continue; } $extractor = new $class(); if (!is_a($extractor, $baseclass)) { $default->log->error(sprintf(_kt("diagnose(): '%s' is not of type DocumentExtractor"), $class)); continue; } $types = $extractor->getSupportedMimeTypes(); if (empty($types)) { if ($this->debug) $default->log->debug(sprintf(_kt("diagnose: class '%s' does not support any types."), $class)); continue; } $diagnosis=$extractor->diagnose(); if (empty($diagnosis)) { continue; } $diagnoses[$class] = array( 'name'=>$extractor->getDisplayName(), 'diagnosis'=>$diagnosis ); } closedir($dir); return $diagnoses; } /** * Register the extractor types. * * @param boolean $clear. Optional. Defaults to false. */ public function registerTypes($clear=false) { if ($clear) { $this->clearExtractors(); } $dir = opendir(SearchHelper::correctPath($this->extractorPath)); while (($file = readdir($dir)) !== false) { if (substr($file,-17) == 'Extractor.inc.php') { require_once($this->extractorPath . '/' . $file); $class = substr($file, 0, -8); if (!class_exists($class)) { // if the class does not exist, we can't do anything. continue; } $extractor = new $class; if ($extractor instanceof DocumentExtractor) { $extractor->registerMimeTypes(); } } } closedir($dir); } /** * This is used as a possible obtimisation effort. It may be overridden in that case. * * @param int $docId * @param string $textFile */ protected function indexDocumentAndDiscussion($docId, $textFile, $title, $version) { $this->indexDocument($docId, $textFile, $title, $version); $this->indexDiscussion($docId); } /** * Remove the document from the queue. This is normally called when it has been processed. * * @param int $docid */ public static function unqueueDocument($docid, $reason=false, $level='debug') { $sql = "DELETE FROM index_files WHERE document_id=$docid"; DBUtil::runQuery($sql); if ($reason !== false) { global $default; $default->log->$level("Indexer: removing document $docid from the queue - $reason"); } } /** * Run a query on the index. * * @param string $query * @return array */ public abstract function query($query); /** * Converts an integer to a string that can be easily compared and reversed. * * @param int $int * @return string */ public static function longToString($int) { $maxlen = 14; $a2z = array('a','b','c','d','e','f','g','h','i','j'); $o29 = array('0','1','2','3','4','5','6','7','8','9'); $l = str_pad('',$maxlen - strlen("$int"),'0') . $int; return str_replace($o29, $a2z, $l); } /** * Converts a string to an integer. * * @param string $str * @return int */ public static function stringToLong($str) { $a2z = array('a','b','c','d','e','f','g','h','i','j'); $o29 = array('0','1','2','3','4','5','6','7','8','9'); $int = str_replace($a2z, $o29, $str) + 0; return $int; } /** * Possibly we can optimise indexes. This method must be overriden. * The new function must call the parent! * */ public function optimise() { KTUtil::setSystemSetting('luceneOptimisationDate', time()); } /** * Shuts down the indexer * */ public function shutdown() { // do nothing generally } /** * Returns the name of the indexer. * * @return string */ public abstract function getDisplayName(); /** * Returns the number of non-deleted documents in the index. * * @return int */ public abstract function getDocumentsInIndex(); public abstract function isDocumentIndexed($documentId); /** * Returns the path to the index directory * * @return string */ public function getIndexDirectory() { $config = KTConfig::getSingleton(); $directory = $config->get('indexer/luceneDirectory'); return $directory; }}?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -