📄 indexercore.inc.php
字号:
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error');
$this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
}
$extractor->setIndexingStatus($indexStatus);
}
}
$this->executeHook($extractor, 'post_index', $mimeType);
$this->executeHook($extractor, 'post_index');
}
else
{
$extractor->setExtractionStatus(false);
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error');
$this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
}
$this->executeHook($extractor, 'post_extract', $mimeType);
$this->executeHook($extractor, 'post_extract');
if ($extractor->needsIntermediateSourceFile())
{
@unlink($sourceFile);
}
@unlink($targetFile);
}
else
{
$indexStatus = $this->indexDiscussion($docId);
$removeFromQueue = $indexStatus;
}
if ($removeFromQueue)
{
Indexer::unqueueDocument($docId, sprintf(_kt("Done indexing docid: %d"),$docId));
}
else
{
if ($this->debug) $default->log->debug(sprintf(_kt("Document docid: %d was not removed from the queue as it looks like there was a problem with the extraction process"),$docId));
}
}
if ($this->debug) $default->log->debug('indexDocuments: done');
//unlink($indexLockFile);
}
public function migrateDocuments($max=null)
{
global $default;
$default->log->info(_kt('migrateDocuments: starting'));
if (!$this->doesDiagnosticsPass(true))
{
$default->log->info(_kt('migrateDocuments: stopping - diagnostics problem. The dashboard will provide more information.'));
return;
}
if (KTUtil::getSystemSetting('migrationComplete') == 'true')
{
$default->log->info(_kt('migrateDocuments: stopping - migration is complete.'));
return;
}
$config =& KTConfig::getSingleton();
if (is_null($max))
{
$max = $config->get('indexer/batchMigrateDocument',500);
}
$lockFile = $config->get('cache/cacheDirectory') . '/migration.lock';
if (is_file($lockFile))
{
$default->log->info(_kt('migrateDocuments: stopping - migration lockfile detected.'));
return;
}
touch($lockFile);
$startTime = KTUtil::getSystemSetting('migrationStarted');
if (is_null($startTime))
{
KTUtil::setSystemSetting('migrationStarted', time());
}
$maxLoops = 5;
$max = ceil($max / $maxLoops);
$start =KTUtil::getBenchmarkTime();
$noDocs = false;
$numDocs = 0;
for($loop=0;$loop<$maxLoops;$loop++)
{
$sql = "SELECT
document_id, document_text
FROM
document_text
ORDER BY document_id
LIMIT $max";
$result = DBUtil::getResultArray($sql);
if (PEAR::isError($result))
{
$default->log->info(_kt('migrateDocuments: db error'));
break;
}
$docs = count($result);
if ($docs == 0)
{
$noDocs = true;
break;
}
$numDocs += $docs;
foreach($result as $docinfo)
{
$docId = $docinfo['document_id'];
$document = Document::get($docId);
if (PEAR::isError($document) || is_null($document))
{
$sql = "DELETE FROM document_text WHERE document_id=$docId";
DBUtil::runQuery($sql);
$default->log->error(sprintf(_kt('migrateDocuments: Could not get document %d\'s document! Removing content!'),$docId));
continue;
}
$version = $document->getMajorVersionNumber() . '.' . $document->getMinorVersionNumber();
$targetFile = tempnam($tempPath, 'ktindexer');
if (file_put_contents($targetFile, $docinfo['document_text']) === false)
{
$default->log->error(sprintf(_kt('migrateDocuments: Cannot write to \'%s\' for document id %d'), $targetFile, $docId));
continue;
}
// free memory asap ;)
unset($docinfo['document_text']);
$title = $document->getName();
$indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title, $version);
if ($indexStatus)
{
$sql = "DELETE FROM document_text WHERE document_id=$docId";
DBUtil::runQuery($sql);
}
else
{
$default->log->error(sprintf(_kt("migrateDocuments: Problem indexing document %d"), $docId));
}
@unlink($targetFile);
}
}
@unlink($lockFile);
$time = KTUtil::getBenchmarkTime() - $start;
KTUtil::setSystemSetting('migrationTime', KTUtil::getSystemSetting('migrationTime',0) + $time);
KTUtil::setSystemSetting('migratedDocuments', KTUtil::getSystemSetting('migratedDocuments',0) + $numDocs);
$default->log->info(sprintf(_kt('migrateDocuments: stopping - done in %d seconds!'), $time));
if ($noDocs)
{
$default->log->info(_kt('migrateDocuments: Completed!'));
KTUtil::setSystemSetting('migrationComplete', 'true');
schedulerUtil::deleteByName('Index Migration');
$default->log->debug(_kt('migrateDocuments: Disabling \'Index Migration\' task by removing scheduler entry.'));
}
}
/**
* Index a document. The base class must override this function.
*
* @param int $docId
* @param string $textFile
*/
protected abstract function indexDocument($docId, $textFile, $title, $version);
public function updateDocumentIndex($docId, $text)
{
$config = KTConfig::getSingleton();
$tempPath = $config->get("urls/tmpDirectory");
$tempFile = tempnam($tempPath,'ud_');
file_put_contents($tempFile, $text);
$document = Document::get($docId);
$title = $document->getDescription();
$version = $document->getVersion();
$result = $this->indexDocument($docId, $tempFile, $title, $version);
if (file_exists($tempFile))
{
unlink($tempFile);
}
return $result;
}
/**
* Index a discussion. The base class must override this function.
*
* @param int $docId
*/
protected abstract function indexDiscussion($docId);
/**
* Diagnose the indexer. e.g. Check that the indexing server is running.
*
*/
public abstract function diagnose();
/**
* Diagnose the extractors.
*
* @return array
*/
public function diagnoseExtractors()
{
$diagnosis = $this->_diagnose($this->extractorPath, 'DocumentExtractor', 'Extractor.inc.php');
$diagnosis = array_merge($diagnosis, $this->_diagnose($this->hookPath, 'Hook', 'Hook.inc.php'));
return $diagnosis;
}
/**
* This is a refactored diagnose function.
*
* @param string $path
* @param string $class
* @param string $extension
* @return array
*/
private function _diagnose($path, $baseclass, $extension)
{
global $default;
$diagnoses = array();
$dir = opendir(SearchHelper::correctPath($path));
$extlen = - strlen($extension);
while (($file = readdir($dir)) !== false)
{
if (substr($file,0,1) == '.')
{
continue;
}
if (substr($file,$extlen) != $extension)
{
$default->log->error(sprintf(_kt("diagnose: '%s' does not have extension '%s'."), $file, $extension));
continue;
}
require_once($path . '/' . $file);
$class = substr($file, 0, -8);
if (!class_exists($class))
{
$default->log->error(sprintf(_kt("diagnose: class '%s' does not exist."), $class));
continue;
}
if (!$this->isExtractorEnabled($class))
{
$default->log->debug(sprintf(_kt("diagnose: extractor '%s' is disabled."), $class));
continue;
}
$extractor = new $class();
if (!is_a($extractor, $baseclass))
{
$default->log->error(sprintf(_kt("diagnose(): '%s' is not of type DocumentExtractor"), $class));
continue;
}
$types = $extractor->getSupportedMimeTypes();
if (empty($types))
{
if ($this->debug) $default->log->debug(sprintf(_kt("diagnose: class '%s' does not support any types."), $class));
continue;
}
$diagnosis=$extractor->diagnose();
if (empty($diagnosis))
{
continue;
}
$diagnoses[$class] = array(
'name'=>$extractor->getDisplayName(),
'diagnosis'=>$diagnosis
);
}
closedir($dir);
return $diagnoses;
}
/**
* Register the extractor types.
*
* @param boolean $clear. Optional. Defaults to false.
*/
public function registerTypes($clear=false)
{
if ($clear)
{
$this->clearExtractors();
}
$dir = opendir(SearchHelper::correctPath($this->extractorPath));
while (($file = readdir($dir)) !== false)
{
if (substr($file,-17) == 'Extractor.inc.php')
{
require_once($this->extractorPath . '/' . $file);
$class = substr($file, 0, -8);
if (!class_exists($class))
{
// if the class does not exist, we can't do anything.
continue;
}
$extractor = new $class;
if ($extractor instanceof DocumentExtractor)
{
$extractor->registerMimeTypes();
}
}
}
closedir($dir);
}
/**
* This is used as a possible obtimisation effort. It may be overridden in that case.
*
* @param int $docId
* @param string $textFile
*/
protected function indexDocumentAndDiscussion($docId, $textFile, $title, $version)
{
$this->indexDocument($docId, $textFile, $title, $version);
$this->indexDiscussion($docId);
}
/**
* Remove the document from the queue. This is normally called when it has been processed.
*
* @param int $docid
*/
public static function unqueueDocument($docid, $reason=false, $level='debug')
{
$sql = "DELETE FROM index_files WHERE document_id=$docid";
DBUtil::runQuery($sql);
if ($reason !== false)
{
global $default;
$default->log->$level("Indexer: removing document $docid from the queue - $reason");
}
}
/**
* Run a query on the index.
*
* @param string $query
* @return array
*/
public abstract function query($query);
/**
* Converts an integer to a string that can be easily compared and reversed.
*
* @param int $int
* @return string
*/
public static function longToString($int)
{
$maxlen = 14;
$a2z = array('a','b','c','d','e','f','g','h','i','j');
$o29 = array('0','1','2','3','4','5','6','7','8','9');
$l = str_pad('',$maxlen - strlen("$int"),'0') . $int;
return str_replace($o29, $a2z, $l);
}
/**
* Converts a string to an integer.
*
* @param string $str
* @return int
*/
public static function stringToLong($str)
{
$a2z = array('a','b','c','d','e','f','g','h','i','j');
$o29 = array('0','1','2','3','4','5','6','7','8','9');
$int = str_replace($a2z, $o29, $str) + 0;
return $int;
}
/**
* Possibly we can optimise indexes. This method must be overriden.
* The new function must call the parent!
*
*/
public function optimise()
{
KTUtil::setSystemSetting('luceneOptimisationDate', time());
}
/**
* Shuts down the indexer
*
*/
public function shutdown()
{
// do nothing generally
}
/**
* Returns the name of the indexer.
*
* @return string
*/
public abstract function getDisplayName();
/**
* Returns the number of non-deleted documents in the index.
*
* @return int
*/
public abstract function getDocumentsInIndex();
public abstract function isDocumentIndexed($documentId);
/**
* Returns the path to the index directory
*
* @return string
*/
public function getIndexDirectory()
{
$config = KTConfig::getSingleton();
$directory = $config->get('indexer/luceneDirectory');
return $directory;
}
}
?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -