📄 indexercore.inc.php.svn-base
字号:
$this->extractorCache = array(); $this->debug = $config->get('indexer/debug', true); $this->hookCache = array(); $this->generalHookCache = array(); $this->extractorPath = $config->get('indexer/extractorPath', 'extractors'); $this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks'); $this->loadExtractorStatus(); } /** * Get the list if enabled extractors * */ private function loadExtractorStatus() { $sql = "SELECT id, name FROM mime_extractors WHERE active=1"; $rs = DBUtil::getResultArray($sql); $this->enabledExtractors = array(); foreach($rs as $item) { $this->enabledExtractors[] = $item['name']; } } private function isExtractorEnabled($extractor) { return in_array($extractor, $this->enabledExtractors); } /** * Returns a reference to the main class * * @return Indexer */ public static function get() { static $singleton = null; if (is_null($singleton)) { $config = KTConfig::getSingleton(); $classname = $config->get('indexer/coreClass'); require_once('indexing/indexers/' . $classname . '.inc.php'); if (!class_exists($classname)) { throw new Exception("Class '$classname' does not exist."); } $singleton = new $classname; } return $singleton; } public abstract function deleteDocument($docid); /** * Remove the association of all extractors to mime types on the database. * */ public function clearExtractors() { global $default; $sql = "update mime_types set extractor_id=null"; DBUtil::runQuery($sql); $sql = "delete from mime_extractors"; DBUtil::runQuery($sql); if ($this->debug) $default->log->debug('clearExtractors'); } /** * lookup the name of the extractor class based on the mime type. * * @param string $type * @return string */ public static function resolveExtractor($type) { global $default; $sql = "select extractor from mime_types where filetypes='$type'"; $class = DBUtil::getOneResultKey($sql,'extractor'); if (PEAR::isError($class)) { $default->log->error("resolveExtractor: cannot resolve $type"); return $class; } if ($this->debug) $default->log->debug(sprintf(_kt("resolveExtractor: Resolved '%s' from mime type '%s'."), $class, $type)); return $class; } /** * Return all the discussion text. * * @param int $docid * @return string */ public static function getDiscussionText($docid) { $sql = "SELECT dc.subject, dc.body FROM discussion_threads dt INNER JOIN discussion_comments dc ON dc.thread_id=dt.id AND dc.id BETWEEN dt.first_comment_id AND dt.last_comment_id WHERE dt.document_id=$docid"; $result = DBUtil::getResultArray($sql); $text = ''; foreach($result as $record) { $text .= $record['subject'] . "\n" . $record['body'] . "\n"; } return $text; } /** * Schedule the indexing of a document. * * @param string $document * @param string $what */ public static function index($document, $what='A') { global $default; if (is_numeric($document)) { $document = Document::get($document+0); } if (PEAR::isError($document)) { $default->log->error("index: Could not index document: " .$document->getMessage()); return; } $document_id = $document->getId(); $userid=$_SESSION['userID']; if (empty($userid)) $userid=1; // we dequeue the document so that there are no issues when enqueuing Indexer::unqueueDocument($document_id); // enqueue item $sql = "INSERT INTO index_files(document_id, user_id, what) VALUES($document_id, $userid, '$what')"; DBUtil::runQuery($sql); $default->log->debug("index: Queuing indexing of $document_id"); } private static function incrementCount() { // Get count from system settings $count = Indexer::getIndexedDocumentCount(); $count = (int)$count + 1; Indexer::updateIndexedDocumentCount($count); } public static function getIndexedDocumentCount() { $count = KTUtil::getSystemSetting('indexedDocumentCount', 0); return (int) $count; } public static function updateIndexedDocumentCount($cnt = 0) { KTUtil::setSystemSetting('indexedDocumentCount', $cnt); } public static function reindexQueue() { $sql = "UPDATE index_files SET processdate = null"; DBUtil::runQuery($sql); } public static function reindexDocument($documentId) { $sql = "UPDATE index_files SET processdate=null, status_msg=null WHERE document_id=$documentId"; DBUtil::runQuery($sql); } public static function indexAll() { $userid=$_SESSION['userID']; if (empty($userid)) $userid=1; $sql = "DELETE FROM index_files"; DBUtil::runQuery($sql); $sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'A' FROM documents WHERE status_id=1 and id not in (select document_id from index_files)"; DBUtil::runQuery($sql); } public static function indexFolder($folder) { $userid=$_SESSION['userID']; if (empty($userid)) $userid=1; if (!$folder instanceof Folder && !$folder instanceof FolderProxy) { throw new Exception('Folder expected'); } $full_path = $folder->getFullPath(); $sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'A' FROM documents WHERE full_path like '{$full_path}/%' AND status_id=1 and id not in (select document_id from index_files)"; DBUtil::runQuery($sql); } /** * Clearout the scheduling of documents that no longer exist. * */ public static function clearoutDeleted() { global $default; $sql = 'DELETE FROM index_files WHERE document_id in (SELECT d.id FROM documents AS d WHERE d.status_id=3) OR NOT EXISTS(SELECT index_files.document_id FROM documents WHERE index_files.document_id=documents.id)'; DBUtil::runQuery($sql); $default->log->debug("Indexer::clearoutDeleted: removed documents from indexing queue that have been deleted"); } /** * Check if a document is scheduled to be indexed * * @param mixed $document This may be a document or document id * @return boolean */ public static function isDocumentScheduled($document) { if (is_numeric($document)) { $docid = $document; } else if ($document instanceof Document) { $docid = $document->getId(); } else { return false; } $sql = "SELECT 1 FROM index_files WHERE document_id=$docid"; $result = DBUtil::getResultArray($sql); return count($result) > 0; } /** * Filters text removing redundant characters such as continuous newlines and spaces. * * @param string $filename */ private function filterText($filename) { $content = file_get_contents($filename); $src = array("([\r\n])","([\n][\n])","([\n])","([\t])",'([ ][ ])'); $tgt = array("\n","\n",' ',' ',' '); // shrink what is being stored. do { $orig = $content; $content = preg_replace($src, $tgt, $content); } while ($content != $orig); return file_put_contents($filename, $content) !== false; } /** * Load hooks for text extraction process. * */ private function loadExtractorHooks() { $this->generalHookCache = array(); $this->mimeHookCache = array(); $dir = opendir(SearchHelper::correctPath($this->hookPath)); while (($file = readdir($dir)) !== false) { if (substr($file,-12) == 'Hook.inc.php') { require_once($this->hookPath . '/' . $file); $class = substr($file, 0, -8); if (!class_exists($class)) { continue; } $hook = new $class; if (!($class instanceof ExtractorHook)) { continue; } $mimeTypes = $hook->registerMimeTypes(); if (is_null($mimeTypes)) { $this->generalHookCache[] = & $hook; } else { foreach($mimeTypes as $type) { $this->mimeHookCache[$type][] = & $hook; } } } } closedir($dir); } /** * This is a refactored function to execute the hooks. * * @param DocumentExtractor $extractor * @param string $phase * @param string $mimeType Optional. If set, indicates which hooks must be used, else assume general. */ private function executeHook($extractor, $phase, $mimeType = null) { $hooks = array(); if (is_null($mimeType)) { $hooks = $this->generalHookCache; } else { if (array_key_exists($mimeType, $this->mimeHookCache)) { $hooks = $this->mimeHookCache[$mimeType]; } } if (empty($hooks)) { return; } foreach($hooks as $hook) { $hook->$phase($extractor); } } private function doesDiagnosticsPass($simple=false) { global $default; $config =& KTConfig::getSingleton(); // create a index log lock file in case there are errors, and we don't need to log them forever! // this function will create the lockfile if an error is detected. It will be removed as soon // as the problems with the indexer are removed. $lockFile = $config->get('cache/cacheDirectory') . '/index.log.lock'; $diagnosis = $this->diagnose(); if (!is_null($diagnosis)) { if (!is_file($lockFile)) { $default->log->error(_kt('Indexer problem: ') . $diagnosis); } touch($lockFile); return false; } if ($simple) { return true; } $diagnosis = $this->diagnoseExtractors(); if (!empty($diagnosis)) { if (!is_file($lockFile)) { foreach($diagnosis as $diag) { $default->log->error(sprintf(_kt('%s problem: %s'), $diag['name'],$diag['diagnosis'])); } } touch($lockFile); return false; } if (is_file($lockFile)) { $default->log->info(_kt('Issues with the indexer have been resolved!')); unlink($lockFile); } return true; } /** * This does the initial mime type association between mime types and text extractors * */ public function checkForRegisteredTypes() { global $default; // we are only doing this once! $initRegistered = KTUtil::getSystemSetting('mimeTypesRegistered', false); if ($initRegistered) { return; } if ($this->debug) $default->log->debug('checkForRegisteredTypes: start'); $date = date('Y-m-d H:i'); $sql = "UPDATE scheduler_tasks SET run_time='$date'"; DBUtil::runQuery($sql); $this->registerTypes(true); $disable = array( 'windows'=>array('PSExtractor'), 'unix' => array() ); $disableForOS = OS_WINDOWS?$disable['windows']:$disable['unix']; if (!empty($disableForOS)) { $disableForOS = '\'' . implode("','", $disableForOS) .'\''; $sql = "UPDATE mime_extractors SET active=0 WHERE name in ($disableForOS)"; DBUtil::runQuery($sql); $default->log->info("checkForRegisteredTypes: disabled '$extractor'"); } $this->loadExtractorStatus(); if ($this->debug) $default->log->debug('checkForRegisteredTypes: done'); KTUtil::setSystemSetting('mimeTypesRegistered', true); } private function updatePendingDocumentStatus($documentId, $message, $level) { $this->indexingHistory .= "\n" . $level . ': ' . $message; $message = sanitizeForSQL($this->indexingHistory); $sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId"; DBUtil::runQuery($sql); } private $restartCurrentBatch = false;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -