⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 class.crawler.php

📁 Typo3, 开源里边最强大的
💻 PHP
📖 第 1 页 / 共 3 页
字号:
<?php/****************************************************************  Copyright notice**  (c) 2001-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)*  All rights reserved**  This script is part of the TYPO3 project. The TYPO3 project is*  free software; you can redistribute it and/or modify*  it under the terms of the GNU General Public License as published by*  the Free Software Foundation; either version 2 of the License, or*  (at your option) any later version.**  The GNU General Public License can be found at*  http://www.gnu.org/copyleft/gpl.html.*  A copy is found in the textfile GPL.txt and important notices to the license*  from the author is found in LICENSE.txt distributed with these scripts.***  This script is distributed in the hope that it will be useful,*  but WITHOUT ANY WARRANTY; without even the implied warranty of*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*  GNU General Public License for more details.**  This copyright notice MUST APPEAR in all copies of the script!***************************************************************//** * Crawler hook for indexed search. Works with the "crawler" extension * * @author	Kasper Sk錼h鴍 <kasperYYYY@typo3.com> *//** * [CLASS/FUNCTION INDEX of SCRIPT] * * * *   87: class tx_indexedsearch_crawler *  106:     function crawler_init(&$pObj) *  219:     function crawler_execute($params,&$pObj) *  285:     function crawler_execute_type1($cfgRec,&$session_data,$params,&$pObj) *  345:     function crawler_execute_type2($cfgRec,&$session_data,$params,&$pObj) *  414:     function crawler_execute_type3($cfgRec,&$session_data,$params,&$pObj) *  458:     function crawler_execute_type4($cfgRec,&$session_data,$params,&$pObj) *  513:     function cleanUpOldRunningConfigurations() * *              SECTION: Helper functions *  579:     function checkUrl($url,$urlLog,$baseUrl) *  602:     function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId) *  645:     function indexSingleRecord($r,$cfgRec,$rl=NULL) *  694:     function loadIndexerClass() *  706:     function getUidRootLineForClosestTemplate($id) *  739:     function generateNextIndexingTime($cfgRec) *  778:     function checkDeniedSuburls($url, $url_deny) *  798:     function addQueueEntryForHook($cfgRec, $title) * *              SECTION: Hook functions for TCEmain (indexing of records) *  830:     function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, &$pObj) * * *  879: class tx_indexedsearch_files *  888:     function crawler_execute($params,&$pObj) *  913:     function loadIndexerClass() * * TOTAL FUNCTIONS: 18 * (This index is automatically created/updated by the extension "extdeveval") * */# To make sure the backend charset is available:require_once(PATH_typo3.'sysext/lang/lang.php');if (!is_object($GLOBALS['LANG']))	{	$GLOBALS['LANG'] = t3lib_div::makeInstance('language');	$GLOBALS['LANG']->init($GLOBALS['BE_USER']->uc['lang']);}/** * Crawler hook for indexed search. Works with the "crawler" extension * * @author	Kasper Skaarhoj <kasperYYYY@typo3.com> * @package TYPO3 * @subpackage tx_indexedsearch */class tx_indexedsearch_crawler {		// Static:	var $secondsPerExternalUrl = 3;		// Number of seconds to use as interval between queued indexing operations of URLs / files (types 2 & 3)		// Internal, dynamic:	var $instanceCounter = 0;		// Counts up for each added URL (type 3)		// Internal, static:	var $callBack = 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler';		// The object reference to this class.	/**	 * Initialization of crawler hook.	 * This function is asked for each instance of the crawler and we must check if something is timed to happen and if so put entry(s) in the crawlers log to start processing.	 * In reality we select indexing configurations and evaluate if any of them needs to run.	 *	 * @param	object		Parent object (tx_crawler lib)	 * @return	void	 */	function crawler_init(&$pObj){			// Select all indexing configuration which are waiting to be activated:		$indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(			'*',			'index_config',			'hidden=0				AND (starttime=0 OR starttime<='.time().')				AND timer_next_indexing<'.time().'				AND set_id=0				'.t3lib_BEfunc::deleteClause('index_config')		);			// For each configuration, check if it should be executed and if so, start:		foreach($indexingConfigurations as $cfgRec)	{				// Generate a unique set-ID:			$setId = t3lib_div::md5int(microtime());				// Get next time:			$nextTime = $this->generateNextIndexingTime($cfgRec);				// Start process by updating index-config record:			$field_array = array (				'set_id' => $setId,				'timer_next_indexing' => $nextTime,				'session_data' => '',			);			$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config','uid='.intval($cfgRec['uid']), $field_array);				// Based on configuration type:			switch($cfgRec['type'])	{				case 1:	// RECORDS:						// Parameters:					$params = array(						'indexConfigUid' => $cfgRec['uid'],						'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'),						'url' => 'Records (start)',	// Just for show.					);						//					$pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']);				break;				case 2:	// FILES:						// Parameters:					$params = array(						'indexConfigUid' => $cfgRec['uid'],		// General						'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'),	// General						'url' => $cfgRec['filepath'],	// Partly general... (for URL and file types)						'depth' => 0	// Specific for URL and file types					);					$pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']);				break;				case 3:	// External URL:						// Parameters:					$params = array(						'indexConfigUid' => $cfgRec['uid'],		// General						'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'),	// General						'url' => $cfgRec['externalUrl'],	// Partly general... (for URL and file types)						'depth' => 0	// Specific for URL and file types					);					$pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']);				break;				case 4:	// Page tree						// Parameters:					$params = array(						'indexConfigUid' => $cfgRec['uid'],		// General						'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'),	// General						'url' => intval($cfgRec['alternative_source_pid']),	// Partly general... (for URL and file types and page tree (root))						'depth' => 0	// Specific for URL and file types and page tree					);					$pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']);				break;				case 5:	// Meta configuration, nothing to do:					# NOOP				break;				default:					if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']])	{						$hookObj = &t3lib_div::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);						if (is_object($hookObj))	{								// Parameters:							$params = array(								'indexConfigUid' => $cfgRec['uid'],		// General								'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].'/CUSTOM]'),	// General								'url' => $hookObj->initMessage($message),							);							$pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']);						}					}				break;			}		}			// Finally, look up all old index configurations which are finished and needs to be reset and done.		$this->cleanUpOldRunningConfigurations();	}	/**	 * Call back function for execution of a log element	 *	 * @param	array		Params from log element. Must contain $params['indexConfigUid']	 * @param	object		Parent object (tx_crawler lib)	 * @return	array		Result array	 */	function crawler_execute($params,&$pObj)	{			// Indexer configuration ID must exist:		if ($params['indexConfigUid'])	{				// Load the indexing configuration record:			list($cfgRec) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(				'*',				'index_config',				'uid='.intval($params['indexConfigUid'])			);			if (is_array($cfgRec))	{					// Unpack session data:				$session_data = unserialize($cfgRec['session_data']);					// Select which type:				switch($cfgRec['type'])	{					case 1:	// Records:						$this->crawler_execute_type1($cfgRec,$session_data,$params,$pObj);					break;					case 2:	// Files						$this->crawler_execute_type2($cfgRec,$session_data,$params,$pObj);					break;					case 3:	// External URL:						$this->crawler_execute_type3($cfgRec,$session_data,$params,$pObj);					break;					case 4:	// Page tree:						$this->crawler_execute_type4($cfgRec,$session_data,$params,$pObj);					break;					case 5:	// Meta						# NOOP (should never enter here!)					break;					default:						if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']])	{							$hookObj = &t3lib_div::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);							if (is_object($hookObj))	{								$this->pObj = &$pObj;	// For addQueueEntryForHook()								$hookObj->indexOperation($cfgRec,$session_data,$params,$this);							}						}					break;				}					// Save process data which might be modified:				$field_array = array (					'session_data' => serialize($session_data)				);				$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config','uid='.intval($cfgRec['uid']), $field_array);			}		}		return array('log' => $params);	}	/**	 * Indexing records from a table	 *	 * @param	array		Indexing Configuration Record	 * @param	array		Session data for the indexing session spread over multiple instances of the script. Passed by reference so changes hereto will be saved for the next call!	 * @param	array		Parameters from the log queue.	 * @param	object		Parent object (from "crawler" extension!)	 * @return	void	 */	function crawler_execute_type1($cfgRec,&$session_data,$params,&$pObj)	{		if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']]))	{				// Init session data array if not already:			if (!is_array($session_data))	{				$session_data = array(					'uid' => 0				);			}				// Init:			$pid = intval($cfgRec['alternative_source_pid']) ? intval($cfgRec['alternative_source_pid']) : $cfgRec['pid'];			$numberOfRecords = $cfgRec['recordsbatch'] ? t3lib_div::intInRange($cfgRec['recordsbatch'],1) : 100;				// Get root line:			$rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);				// Select			$recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(						'*',						$cfgRec['table2index'],						'pid = '.intval($pid).'							AND uid > '.intval($session_data['uid']).							t3lib_BEfunc::deleteClause($cfgRec['table2index']),						'',						'uid',						$numberOfRecords					);				// Traverse:			if (count($recs))	{				foreach($recs as $r)	{						// Index single record:					$this->indexSingleRecord($r,$cfgRec,$rl);						// Update the UID we last processed:					$session_data['uid'] = $r['uid'];				}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -