📄 indexercore.inc.php
字号:
$this->extractorCache = array();
$this->debug = $config->get('indexer/debug', true);
$this->hookCache = array();
$this->generalHookCache = array();
$this->extractorPath = $config->get('indexer/extractorPath', 'extractors');
$this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks');
$this->loadExtractorStatus();
}
/**
* Get the list if enabled extractors
*
*/
private function loadExtractorStatus()
{
$sql = "SELECT id, name FROM mime_extractors WHERE active=1";
$rs = DBUtil::getResultArray($sql);
$this->enabledExtractors = array();
foreach($rs as $item)
{
$this->enabledExtractors[] = $item['name'];
}
}
private function isExtractorEnabled($extractor)
{
return in_array($extractor, $this->enabledExtractors);
}
/**
* Returns a reference to the main class
*
* @return Indexer
*/
public static function get()
{
static $singleton = null;
if (is_null($singleton))
{
$config = KTConfig::getSingleton();
$classname = $config->get('indexer/coreClass');
require_once('indexing/indexers/' . $classname . '.inc.php');
if (!class_exists($classname))
{
throw new Exception("Class '$classname' does not exist.");
}
$singleton = new $classname;
}
return $singleton;
}
public abstract function deleteDocument($docid);
/**
* Remove the association of all extractors to mime types on the database.
*
*/
public function clearExtractors()
{
global $default;
$sql = "update mime_types set extractor_id=null";
DBUtil::runQuery($sql);
$sql = "delete from mime_extractors";
DBUtil::runQuery($sql);
if ($this->debug) $default->log->debug('clearExtractors');
}
/**
* lookup the name of the extractor class based on the mime type.
*
* @param string $type
* @return string
*/
public static function resolveExtractor($type)
{
global $default;
$sql = "select extractor from mime_types where filetypes='$type'";
$class = DBUtil::getOneResultKey($sql,'extractor');
if (PEAR::isError($class))
{
$default->log->error("resolveExtractor: cannot resolve $type");
return $class;
}
if ($this->debug) $default->log->debug(sprintf(_kt("resolveExtractor: Resolved '%s' from mime type '%s'."), $class, $type));
return $class;
}
/**
* Return all the discussion text.
*
* @param int $docid
* @return string
*/
public static function getDiscussionText($docid)
{
$sql = "SELECT
dc.subject, dc.body
FROM
discussion_threads dt
INNER JOIN discussion_comments dc ON dc.thread_id=dt.id AND dc.id BETWEEN dt.first_comment_id AND dt.last_comment_id
WHERE
dt.document_id=$docid";
$result = DBUtil::getResultArray($sql);
$text = '';
foreach($result as $record)
{
$text .= $record['subject'] . "\n" . $record['body'] . "\n";
}
return $text;
}
/**
* Schedule the indexing of a document.
*
* @param string $document
* @param string $what
*/
public static function index($document, $what='A')
{
global $default;
if (is_numeric($document))
{
$document = Document::get($document+0);
}
if (PEAR::isError($document))
{
$default->log->error("index: Could not index document: " .$document->getMessage());
return;
}
$document_id = $document->getId();
$userid=$_SESSION['userID'];
if (empty($userid)) $userid=1;
// we dequeue the document so that there are no issues when enqueuing
Indexer::unqueueDocument($document_id);
// enqueue item
$sql = "INSERT INTO index_files(document_id, user_id, what) VALUES($document_id, $userid, '$what')";
DBUtil::runQuery($sql);
$default->log->debug("index: Queuing indexing of $document_id");
}
private static function incrementCount()
{
// Get count from system settings
$count = Indexer::getIndexedDocumentCount();
$count = (int)$count + 1;
Indexer::updateIndexedDocumentCount($count);
}
public static function getIndexedDocumentCount()
{
$count = KTUtil::getSystemSetting('indexedDocumentCount', 0);
return (int) $count;
}
public static function updateIndexedDocumentCount($cnt = 0)
{
KTUtil::setSystemSetting('indexedDocumentCount', $cnt);
}
public static function reindexQueue()
{
$sql = "UPDATE index_files SET processdate = null";
DBUtil::runQuery($sql);
}
public static function reindexDocument($documentId)
{
$sql = "UPDATE index_files SET processdate=null, status_msg=null WHERE document_id=$documentId";
DBUtil::runQuery($sql);
}
public static function indexAll()
{
$userid=$_SESSION['userID'];
if (empty($userid)) $userid=1;
$sql = "DELETE FROM index_files";
DBUtil::runQuery($sql);
$sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'A' FROM documents WHERE status_id=1 and id not in (select document_id from index_files)";
DBUtil::runQuery($sql);
}
public static function indexFolder($folder)
{
$userid=$_SESSION['userID'];
if (empty($userid)) $userid=1;
if (!$folder instanceof Folder && !$folder instanceof FolderProxy)
{
throw new Exception('Folder expected');
}
$full_path = $folder->getFullPath();
$sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'A' FROM documents WHERE full_path like '{$full_path}/%' AND status_id=1 and id not in (select document_id from index_files)";
DBUtil::runQuery($sql);
}
/**
* Clearout the scheduling of documents that no longer exist.
*
*/
public static function clearoutDeleted()
{
global $default;
$sql = 'DELETE FROM
index_files
WHERE
document_id in (SELECT d.id FROM documents AS d WHERE d.status_id=3) OR
NOT EXISTS(SELECT index_files.document_id FROM documents WHERE index_files.document_id=documents.id)';
DBUtil::runQuery($sql);
$default->log->debug("Indexer::clearoutDeleted: removed documents from indexing queue that have been deleted");
}
/**
* Check if a document is scheduled to be indexed
*
* @param mixed $document This may be a document or document id
* @return boolean
*/
public static function isDocumentScheduled($document)
{
if (is_numeric($document))
{
$docid = $document;
}
else if ($document instanceof Document)
{
$docid = $document->getId();
}
else
{
return false;
}
$sql = "SELECT 1 FROM index_files WHERE document_id=$docid";
$result = DBUtil::getResultArray($sql);
return count($result) > 0;
}
/**
* Filters text removing redundant characters such as continuous newlines and spaces.
*
* @param string $filename
*/
private function filterText($filename)
{
$content = file_get_contents($filename);
$src = array("([\r\n])","([\n][\n])","([\n])","([\t])",'([ ][ ])');
$tgt = array("\n","\n",' ',' ',' ');
// shrink what is being stored.
do
{
$orig = $content;
$content = preg_replace($src, $tgt, $content);
} while ($content != $orig);
return file_put_contents($filename, $content) !== false;
}
/**
* Load hooks for text extraction process.
*
*/
private function loadExtractorHooks()
{
$this->generalHookCache = array();
$this->mimeHookCache = array();
$dir = opendir(SearchHelper::correctPath($this->hookPath));
while (($file = readdir($dir)) !== false)
{
if (substr($file,-12) == 'Hook.inc.php')
{
require_once($this->hookPath . '/' . $file);
$class = substr($file, 0, -8);
if (!class_exists($class))
{
continue;
}
$hook = new $class;
if (!($class instanceof ExtractorHook))
{
continue;
}
$mimeTypes = $hook->registerMimeTypes();
if (is_null($mimeTypes))
{
$this->generalHookCache[] = & $hook;
}
else
{
foreach($mimeTypes as $type)
{
$this->mimeHookCache[$type][] = & $hook;
}
}
}
}
closedir($dir);
}
/**
* This is a refactored function to execute the hooks.
*
* @param DocumentExtractor $extractor
* @param string $phase
* @param string $mimeType Optional. If set, indicates which hooks must be used, else assume general.
*/
private function executeHook($extractor, $phase, $mimeType = null)
{
$hooks = array();
if (is_null($mimeType))
{
$hooks = $this->generalHookCache;
}
else
{
if (array_key_exists($mimeType, $this->mimeHookCache))
{
$hooks = $this->mimeHookCache[$mimeType];
}
}
if (empty($hooks))
{
return;
}
foreach($hooks as $hook)
{
$hook->$phase($extractor);
}
}
private function doesDiagnosticsPass($simple=false)
{
global $default;
$config =& KTConfig::getSingleton();
// create a index log lock file in case there are errors, and we don't need to log them forever!
// this function will create the lockfile if an error is detected. It will be removed as soon
// as the problems with the indexer are removed.
$lockFile = $config->get('cache/cacheDirectory') . '/index.log.lock';
$diagnosis = $this->diagnose();
if (!is_null($diagnosis))
{
if (!is_file($lockFile))
{
$default->log->error(_kt('Indexer problem: ') . $diagnosis);
}
touch($lockFile);
return false;
}
if ($simple)
{
return true;
}
$diagnosis = $this->diagnoseExtractors();
if (!empty($diagnosis))
{
if (!is_file($lockFile))
{
foreach($diagnosis as $diag)
{
$default->log->error(sprintf(_kt('%s problem: %s'), $diag['name'],$diag['diagnosis']));
}
}
touch($lockFile);
return false;
}
if (is_file($lockFile))
{
$default->log->info(_kt('Issues with the indexer have been resolved!'));
unlink($lockFile);
}
return true;
}
/**
* This does the initial mime type association between mime types and text extractors
*
*/
public function checkForRegisteredTypes()
{
global $default;
// we are only doing this once!
$initRegistered = KTUtil::getSystemSetting('mimeTypesRegistered', false);
if ($initRegistered)
{
return;
}
if ($this->debug) $default->log->debug('checkForRegisteredTypes: start');
$date = date('Y-m-d H:i');
$sql = "UPDATE scheduler_tasks SET run_time='$date'";
DBUtil::runQuery($sql);
$this->registerTypes(true);
$disable = array(
'windows'=>array('PSExtractor'),
'unix' => array()
);
$disableForOS = OS_WINDOWS?$disable['windows']:$disable['unix'];
if (!empty($disableForOS))
{
$disableForOS = '\'' . implode("','", $disableForOS) .'\'';
$sql = "UPDATE mime_extractors SET active=0 WHERE name in ($disableForOS)";
DBUtil::runQuery($sql);
$default->log->info("checkForRegisteredTypes: disabled '$extractor'");
}
$this->loadExtractorStatus();
if ($this->debug) $default->log->debug('checkForRegisteredTypes: done');
KTUtil::setSystemSetting('mimeTypesRegistered', true);
}
private function updatePendingDocumentStatus($documentId, $message, $level)
{
$this->indexingHistory .= "\n" . $level . ': ' . $message;
$message = sanitizeForSQL($this->indexingHistory);
$sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId";
DBUtil::runQuery($sql);
}
private $restartCurrentBatch = false;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -