📄 class.external_parser.php
字号:
<?php/**************************************************************** Copyright notice** (c) 2001-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)* All rights reserved** This script is part of the TYPO3 project. The TYPO3 project is* free software; you can redistribute it and/or modify* it under the terms of the GNU General Public License as published by* the Free Software Foundation; either version 2 of the License, or* (at your option) any later version.** The GNU General Public License can be found at* http://www.gnu.org/copyleft/gpl.html.* A copy is found in the textfile GPL.txt and important notices to the license* from the author is found in LICENSE.txt distributed with these scripts.*** This script is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** This copyright notice MUST APPEAR in all copies of the script!***************************************************************//** * External standard parsers for indexed_search * * @author Kasper Sk錼h鴍 <kasperYYYY@typo3.com> * @coauthor Olivier Simah <noname_paris@yahoo.fr> *//** * [CLASS/FUNCTION INDEX of SCRIPT] * * * * 75: class tx_indexed_search_extparse * 94: function initParser($extension) * 214: function softInit($extension) * 247: function searchTypeMediaTitle($extension) * 323: function isMultiplePageExtension($extension) * * SECTION: Reading documents (for parsing) * 354: function readFileContent($ext,$absFile,$cPKey) * 521: function fileContentParts($ext,$absFile) * 560: function splitPdfInfo($pdfInfoArray) * 579: function removeEndJunk($string) * * SECTION: Backend analyzer * 606: function getIcon($extension) * * TOTAL FUNCTIONS: 9 * (This index is automatically created/updated by the extension "extdeveval") * *//** * External standard parsers for indexed_search * MUST RETURN utf-8 content! * * @author Kasper Skaarhoj <kasperYYYY@typo3.com> * @package TYPO3 * @subpackage tx_indexedsearch */class tx_indexed_search_extparse { // This value is also overridden from config. var $pdf_mode = -20; // zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10 // This array is configured in initialization: var $app = array(); var $ext2itemtype_map = array(); var $supportedExtensions = array(); var $pObj; // Reference to parent object (indexer class) /** * Initialize external parser for parsing content. * * @param string File extension * @return boolean Returns true if extension is supported/enabled, otherwise false. */ function initParser($extension) { // Then read indexer-config and set if appropriate: $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); // If windows, apply extension to tool name: $exe = (TYPO3_OS == 'WIN') ? '.exe' : ''; // lg $extOK = FALSE; $mainExtension = ''; // Ignore extensions $ignoreExtensions = t3lib_div::trimExplode(',', strtolower($indexerConfig['ignoreExtensions']),1); if (in_array($extension, $ignoreExtensions)) { $this->pObj->log_setTSlogMessage('Extension "'.$extension.'" was set to be ignored.',1); return FALSE; } // Switch on file extension: switch($extension) { case 'pdf': // PDF if ($indexerConfig['pdftools']) { $pdfPath = ereg_replace("\/$",'',$indexerConfig['pdftools']).'/'; if (ini_get('safe_mode') || (@is_file($pdfPath.'pdftotext'.$exe) && @is_file($pdfPath.'pdfinfo'.$exe))) { $this->app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe; $this->app['pdftotext'] = $pdfPath.'pdftotext'.$exe; // PDF mode: $this->pdf_mode = t3lib_div::intInRange($indexerConfig['pdf_mode'],-100,100); $extOK = TRUE; } else $this->pObj->log_setTSlogMessage("PDF tools was not found in paths '".$pdfPath."pdftotext' and/or '".$pdfPath."pdfinfo'",3); } else $this->pObj->log_setTSlogMessage('PDF tools disabled',1); break; case 'doc': // Catdoc if ($indexerConfig['catdoc']) { $catdocPath = ereg_replace("\/$",'',$indexerConfig['catdoc']).'/'; if (ini_get('safe_mode') || @is_file($catdocPath.'catdoc'.$exe)) { $this->app['catdoc'] = $catdocPath.'catdoc'.$exe; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage("'catdoc' tool for reading Word-files was not found in path '".$catdocPath."catdoc'",3); } else $this->pObj->log_setTSlogMessage('catdoc tools (Word-files) disabled',1); break; case 'pps': // MS PowerPoint(?) case 'ppt': // MS PowerPoint // ppthtml if ($indexerConfig['ppthtml']) { $ppthtmlPath = ereg_replace('\/$','',$indexerConfig['ppthtml']).'/'; if (ini_get('safe_mode') || @is_file($ppthtmlPath.'ppthtml'.$exe)){ $this->app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage("'ppthtml' tool for reading Powerpoint-files was not found in path '".$ppthtmlPath."ppthtml'",3); } else $this->pObj->log_setTSlogMessage('ppthtml tools (Powerpoint-files) disabled',1); break; case 'xls': // MS Excel // Xlhtml if ($indexerConfig['xlhtml']) { $xlhtmlPath = ereg_replace('\/$','',$indexerConfig['xlhtml']).'/'; if (ini_get('safe_mode') || @is_file($xlhtmlPath.'xlhtml'.$exe)){ $this->app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage("'xlhtml' tool for reading Excel-files was not found in path '".$xlhtmlPath."xlhtml'",3); } else $this->pObj->log_setTSlogMessage('xlhtml tools (Excel-files) disabled',1); break; case 'sxc': // Open Office Calc. case 'sxi': // Open Office Impress case 'sxw': // Open Office Writer case 'ods': // Oasis OpenDocument Spreadsheet case 'odp': // Oasis OpenDocument Presentation case 'odt': // Oasis OpenDocument Text if ($indexerConfig['unzip']) { $unzipPath = preg_replace('/\/$/','',$indexerConfig['unzip']).'/'; if (ini_get('safe_mode') || @is_file($unzipPath.'unzip'.$exe)) { $this->app['unzip'] = $unzipPath.'unzip'.$exe; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage("'unzip' tool for reading OpenOffice.org-files was not found in path '".$unzipPath."unzip'",3); } else $this->pObj->log_setTSlogMessage('unzip tool (OpenOffice.org-files) disabled',1); break; case 'rtf': // Catdoc if ($indexerConfig['unrtf']) { $unrtfPath = ereg_replace("\/$",'',$indexerConfig['unrtf']).'/'; if (ini_get('safe_mode') || @is_file($unrtfPath.'unrtf'.$exe)) { $this->app['unrtf'] = $unrtfPath.'unrtf'.$exe; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage("'unrtf' tool for reading RTF-files was not found in path '".$unrtfPath."unrtf'",3); } else $this->pObj->log_setTSlogMessage('unrtf tool (RTF-files) disabled',1); break; case 'txt': // Raw text case 'csv': // Raw text case 'xml': // PHP strip-tags() case 'tif': // PHP EXIF $extOK = TRUE; break; case 'html': // PHP strip-tags() case 'htm': // PHP strip-tags() $extOK = TRUE; $mainExtension = 'html'; // making "html" the common "item_type" break; case 'jpg': // PHP EXIF case 'jpeg': // PHP EXIF $extOK = TRUE; $mainExtension = 'jpeg'; // making "jpeg" the common item_type break; } // If extension was OK: if ($extOK) { $this->supportedExtensions[$extension] = TRUE; $this->ext2itemtype_map[$extension] = $mainExtension ? $mainExtension : $extension; return TRUE; } } /** * Initialize external parser for backend modules * Doesn't evaluate if parser is configured right - more like returning POSSIBLE supported extensions (for showing icons etc) in backend and frontend plugin * * @param string File extension to initialize for. * @return boolean Returns true if the extension is supported and enabled, otherwise false. */ function softInit($extension) { switch($extension) { case 'pdf': // PDF case 'doc': // MS Word files case 'pps': // MS PowerPoint case 'ppt': // MS PowerPoint case 'xls': // MS Excel case 'sxc': // Open Office Calc. case 'sxi': // Open Office Impress case 'sxw': // Open Office Writer case 'ods': // Oasis OpenDocument Spreadsheet case 'odp': // Oasis OpenDocument Presentation case 'odt': // Oasis OpenDocument Text case 'rtf': // RTF documents case 'txt': // ASCII Text documents case 'html': // HTML case 'htm': // HTML case 'csv': // Comma Separated Values case 'xml': // Generic XML case 'jpg': // Jpeg images (EXIF comment) case 'jpeg': // Jpeg images (EXIF comment) case 'tif': // TIF images (EXIF comment) return TRUE; break; } } /** * Return title of entry in media type selector box. * * @param string File extension * @return string String with label value of entry in media type search selector box (frontend plugin). */ function searchTypeMediaTitle($extension) { // Read indexer-config $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); // Ignore extensions $ignoreExtensions = t3lib_div::trimExplode(',', strtolower($indexerConfig['ignoreExtensions']),1); if (in_array($extension, $ignoreExtensions)) { return FALSE; } // Switch on file extension: switch($extension) { case 'pdf': // PDF if ($indexerConfig['pdftools']) { return 'PDF'; } break; case 'doc': // Catdoc if ($indexerConfig['catdoc']) { return 'MS Word'; } break; case 'pps': // MS PowerPoint(?) case 'ppt': // MS PowerPoint // ppthtml if ($indexerConfig['ppthtml']) { return 'MS Powerpoint'; } break; case 'xls': // MS Excel // Xlhtml if ($indexerConfig['xlhtml']) { return 'MS Excel'; } break; case 'sxc': // Open Office Calc. case 'sxi': // Open Office Impress case 'sxw': // Open Office Writer case 'ods': // Oasis OpenDocument Spreadsheet case 'odp': // Oasis OpenDocument Presentation case 'odt': // Oasis OpenDocument Text if ($indexerConfig['unzip']) { return 'Open Office'; } break; case 'rtf': // Catdoc if ($indexerConfig['unrtf']) { return 'RTF'; } break; case 'html': // PHP strip-tags() case 'jpeg': // PHP EXIF case 'txt': // Raw text case 'csv': // Raw text case 'xml': // PHP strip-tags() case 'tif': // PHP EXIF return strtoupper($extension); break; // NO entry (duplicates or blank): case 'htm': // PHP strip-tags() case 'jpg': // PHP EXIF default: break; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -