📄 extractorcore.inc.php.svn-base

📁 PHP 知识管理系统（基于树结构的知识管理系统）, 英文原版的PHP源码。
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
上一页 12
	 * @return string	 */	protected function getCommandLine()	{		throw new Exception(_kt('getCommandLine is not implemented'));	}	/**	 * Executes the command that executes the command.	 * Returns true if success.	 *	 * @return boolean	 */	public function extractTextContent()	{		global $default;		$cmdline = $this->getCommandLine();		$class = get_class($this);		$default->log->debug("$class: "  . $cmdline);		return $this->exec($cmdline);	}}abstract class OOFallbackDocumentExtractor extends ExternalDocumentExtractor{    protected $cmd;    protected $params;    /**     * Enter description here...     *     * @var StarOfficeExtractor     */    protected $oo;    public function __construct($cmd, $params)    {        parent::__construct();        $this->cmd = KTUtil::findCommand('externalBinary/' . $cmd, false);        $config = KTConfig::getSingleton();        $this->params = $config->get('indexer/' . $cmd . 'cmdline', $params);        $this->useOO = $config->get('indexer/useOpenOffice', true);        if (!$config->get('indexer/use_' . $cmd, true) || OS_WINDOWS)        {            $this->cmd = false;        }        if ($this->useOO)        {            require_once('extractors/StarOfficeExtractor.inc.php');            $this->oo = new StarOfficeExtractor();        }    }	public function needsIntermediateSourceFile()	{		// we need the intermediate file because it		// has the correct extension. documentConverter uses the extension to determine mimetype		return ($this->useOO);	}	protected function getCommandLine()	{		$sourcefile = $this->sourcefile;		$targetfile = $this->targetfile;		$escape = '"';        $cmd = $this->cmd;		$cmdline = $this->params;		$cmdline = eval("return \"$cmdline\";");		$cmdline = str_replace('\\','/',$cmdline);		return $cmdline;	}    public function extractTextContent()    {        if ($this->cmd !== false)        {            // so we have catppt or something            $result = parent::extractTextContent();            if ($result !== false)            {                // if it returns true, we can bail                return true;            }            // if failure, fallthrough, and attempt OO        }        if ($this->useOO)        {            $this->oo->setSourceFile($this->sourcefile);            $this->oo->setMimeType($this->mimetype);            $this->oo->setExtension($this->extension);            $this->oo->setTargetFile($this->targetfile);            $this->oo->setDocument($this->document);            $this->oo->setIndexingStatus(null);            $this->oo->setExtractionStatus(null);            $result = $this->oo->extractTextContent();            $this->setIndexingStatus($this->oo->getIndexingStatus());            $this->setExtractionStatus($this->oo->getExtractionStatus());            $this->setTargetFile($this->oo->getTargetFile());            return $result;        }        else        {            global $default;            $docId = $this->document->getId();            $cmd = $this->cmd;            $default->log->info("The document {$docId} cannot be indexed as {$cmd} is not available and OpenOffice is not in use.");            file_put_contents($this->targetfile, '');            return true;        }    }    public function diagnose()    {        if ($this->cmd !== false || !$this->useOO)        {            // cmd is found. we don't care about oo.            // if we can't use oo, well, not much we can do....            return null;        }        return $this->oo->diagnose();    }}/** * An extension to the extenal document extractor. A derived class simply needs * to implement a constructor and getSupportedMimeTypes(). * */abstract class ApplicationExtractor extends ExternalDocumentExtractor{	/**	 * The full path to the application that will be run. This will be resolved from	 * the path or using the config file.	 *	 * @var string	 */	private $application;	/**	 * The command name of the application that can be run.	 *	 * @var string	 */	private $command;	/**	 * This is the friendly name for the extractor.	 *	 * @var string	 */	private $displayname;	/**	 * The command line parameters for the application.	 * This may include {source} and {target} where substitutions will be done.	 *	 * @var string	 */	private $params;	/**	 * Initialise the extractor.	 *	 * @param string $section The section in the config file.	 * @param string $appname The application name in the config file.	 * @param string $command The command that can be run.	 * @param string $displayname	 * @param string $params	 */	public function __construct($section, $appname, $command, $displayname, $params)	{		parent::__construct();		$this->application = KTUtil::findCommand("$section/$appname", $command);		$this->command = $command;		$this->displayname = $displayname;		$this->params = $params;	}	/**	 * Return the display name.	 *	 * @return string	 */	public function getDisplayName()	{		return _kt($this->displayname);	}	/**	 * Returns the command line after performing substitutions.	 *	 * @return unknown	 */	protected function getCommandLine()	{		$sources = array('{source}','{target}');		$target = array($this->sourcefile, $this->targetfile);		$escape = OS_WINDOWS?'"':'\'';		$cmdline = $escape . $this->application . $escape . ' ' . str_replace($sources,$target, $this->params);		return $cmdline;	}	/**	 * Identifies if there are any circumstances why the command can not run that could result in the text extraction process	 * failing.	 *	 * @return mixed Returns string if there is a problem, null otherwise.	 */	public function diagnose()	{		if (false === $this->application)		{			return sprintf(_kt("Cannot locate binary for %s (%s)."), $this->displayname, $this->command);		}		return null;	}}abstract class TextExtractor extends DocumentExtractor{	/**	 * This extracts the text from the document.	 *	 * @return boolean	 */	public function extractTextContent()	{	    $config = KTConfig::getSingleton();		$maxTextSize = $config->get('indexer/maxTextSize', 1024 * 1024 * 10); // we'll only take 10 meg by default		$content = file_get_contents($this->sourcefile, null, null, null, $maxTextSize);		if (false === $content)		{			return false;		}		$result = file_put_contents($this->targetfile, $this->filter($content));		return false !== $result;	}	/**	 * There are no external dependancies to diagnose.	 *	 * @return null	 */	public function diagnose()	{		return null;	}}/** * The composite extractor implies that a conversion is done to an intermediate form before another extractor is run. * */abstract class CompositeExtractor extends DocumentExtractor{	/**	 * The initial extractor	 *	 * @var DocumentExtractor	 */	private $sourceExtractor;	/**	 * The text extractor	 *	 * @var DocumentExtractor	 */	private $targetExtractor;	/**	 * The extension for the initial extraction	 *	 * @var string	 */	private $targetExtension;	/**	 * The mime type of the initial extraction.	 *	 * @var string	 */	private $targetMimeType;	public function __construct($sourceExtractor, $targetExtension, $targetMimeType, $targetExtractor, $needsIntermediate)	{		$this->sourceExtractor = $sourceExtractor;		$this->targetExtractor = $targetExtractor;		$this->targetExtension = $targetExtension;		$this->targetMimeType = $targetMimeType;		$this->needsIntermediateSourceFile($needsIntermediate);	}	/**	 * Extracts the content of the document	 *	 * @return string	 */	public function extractTextContent()	{		$intermediateFile = $this->targetfile . '.' . $this->targetExtension;		touch($intermediateFile);		$this->sourceExtractor->setSourceFile($this->sourcefile);		$this->sourceExtractor->setTargetFile($intermediateFile);		$this->sourceExtractor->setDocument($this->getDocument());		$this->sourceExtractor->setMimeType($this->mimetype);		$this->sourceExtractor->setExtension($this->extension);		if (!$this->sourceExtractor->extractTextContent())		{			$this->output = $this->sourceExtractor->output;			@unlink($intermediateFile);			return false;		}		$intermediateFile = $this->sourceExtractor->getTargetFile();		$this->targetExtractor->setSourceFile($intermediateFile);		$this->targetExtractor->setTargetFile($this->targetfile);		$this->targetExtractor->setDocument($this->getDocument());		$this->targetExtractor->setMimeType($this->targetMimeType);		$this->targetExtractor->setExtension($this->targetExtension);		$result = $this->targetExtractor->extractTextContent();		if (!$result)		{			$this->output = $this->targetExtractor->output;		}		@unlink($intermediateFile);		$this->setTargetFile($this->targetExtractor->getTargetFile());		return $result;	}	/**	 * Diagnose the extractors	 *	 * @return mixed	 */	public function diagnose()	{		$diagnosis = $this->sourceExtractor->diagnose();		if (!empty($diagnosis))		{			return $diagnosis;		}		$diagnosis = $this->targetExtractor->diagnose();		if (!empty($diagnosis))		{			return $diagnosis;		}		return null;	}}/** * The purpose of an extractor hook is to effect the * */abstract class ExtractorHook{	/**	 * Returns an array of supported mime types.	 * e.g. return array('plain/text');	 *	 *	 * @return array	 *	 */	public abstract function getSupportedMimeTypes();	/**	 * Returns the friendly name for the hook.	 *	 * @return string	 */	public abstract function getDisplayName();	/**	 * This does a basic diagnosis on the hook.	 *	 * @return string	 */	public function diagnose()	{		return null;	}	/**	 * Perform any pre extraction activities.	 *	 * @param DocumentExtractor $extractor	 */	public function pre_extract($extractor)	{	}	/**	 * Perform any post extraction activities.	 *	 * @param DocumentExtractor $extractor	 */	public function post_extract($extractor)	{	}	/**	 * Perform any pre indexing activities.	 *	 * @param DocumentExtractor $extractor	 */	public function pre_index($extractor)	{	}	/**	 * Perform any post indexing activities.	 *	 * @param DocumentExtractor $extractor	 */	public function post_index($extractor)	{	}}?>
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -