⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 class.crawler.php

📁 Typo3, 开源里边最强大的
💻 PHP
📖 第 1 页 / 共 3 页
字号:
			// Init:		$rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']);		$fieldList = t3lib_div::trimExplode(',',$cfgRec['fieldlist'],1);		$languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField'];		$sys_language_uid = $languageField ? $r[$languageField] : 0;			// (Re)-Indexing a row from a table:		$indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');		parse_str(str_replace('###UID###',$r['uid'],$cfgRec['get_params']),$GETparams);		$indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, $cfgRec['chashcalc'] ? TRUE : FALSE);		$indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);		$indexerObj->forceIndexing = TRUE;		$theContent = '';		foreach($fieldList as $k => $v)	{			if (!$k)	{				$theTitle = $r[$v];			} else {				$theContent.= $r[$v].' ';			}		}			// Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid())		$indexerObj->backend_indexAsTYPO3Page(			strip_tags($theTitle),			'',			'',			strip_tags($theContent),			$GLOBALS['LANG']->charSet,	// Requires that			$r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']],			$r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']],			$r['uid']		);		#echo print_r($indexerObj->internal_log);		#echo print_r($indexerObj->contentParts);	}	/**	 * Include indexer class.	 *	 * @return	void	 */	function loadIndexerClass()	{		global $TYPO3_CONF_VARS;		require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');	}	/**	 * Get rootline for closest TypoScript template root.	 * Algorithm same as used in Web > Template, Object browser	 *	 * @param	integer		The page id to traverse rootline back from	 * @return	array		Array where the root lines uid values are found.	 */	function getUidRootLineForClosestTemplate($id)	{		global $TYPO3_CONF_VARS;		require_once (PATH_t3lib."class.t3lib_page.php");		require_once (PATH_t3lib."class.t3lib_tstemplate.php");		require_once (PATH_t3lib."class.t3lib_tsparser_ext.php");		$tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext");		$tmpl->tt_track = 0;	// Do not log time-performance information		$tmpl->init();				// Gets the rootLine		$sys_page = t3lib_div::makeInstance("t3lib_pageSelect");		$rootLine = $sys_page->getRootLine($id);		$tmpl->runThroughTemplates($rootLine,0);	// This generates the constants/config + hierarchy info for the template.			// Root line uids		$rootline_uids = array();		foreach($tmpl->rootLine as $rlkey => $rldat)	{			$rootline_uids[$rlkey] = $rldat['uid'];		}		return $rootline_uids;	}	/**	 * Generate the unix time stamp for next visit.	 *	 * @param	array		Index configuration record	 * @return	integer		The next time stamp	 */	function generateNextIndexingTime($cfgRec)	{		$currentTime = time();			// Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected		if ($cfgRec['timer_frequency']<=24*3600)	{			$aMidNight = mktime (0,0,0)-1*24*3600;		} else {			$lastTime = $cfgRec['timer_next_indexing']?$cfgRec['timer_next_indexing']:time();			$aMidNight = mktime (0,0,0, date('m',$lastTime), date('d',$lastTime), date('y',$lastTime));		}			// Find last offset time plus frequency in seconds:		$lastSureOffset = $aMidNight+t3lib_div::intInRange($cfgRec['timer_offset'],0,86400);		$frequencySeconds = t3lib_div::intInRange($cfgRec['timer_frequency'],1);			// Now, find out how many blocks of the length of frequency there is until the next time:		$frequencyBlocksUntilNextTime = ceil(($currentTime-$lastSureOffset)/$frequencySeconds);			// Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds.		$nextTime = $lastSureOffset + $frequencyBlocksUntilNextTime*$frequencySeconds;		return $nextTime;	}	/**	 * Checks if $url has any of the URls in the $url_deny "list" in it and if so, returns true.	 *	 * @param	string		URL to test	 * @param	string		String where URLs are separated by line-breaks; If any of these strings is the first part of $url, the function returns TRUE (to indicate denial of decend)	 * @return	boolean		TRUE if there is a matching URL (hence, do not index!)	 */	function checkDeniedSuburls($url, $url_deny)	{		if (trim($url_deny))	{			$url_denyArray = t3lib_div::trimExplode(chr(10),$url_deny,1);			foreach($url_denyArray as $testurl)	{				if (t3lib_div::isFirstPartOfStr($url,$testurl))	{					echo $url.' /// '.$url_deny.chr(10);					return TRUE;				}			}		}		return FALSE;	}	/**	 * Adding entry in queue for Hook	 *	 * @param	array		Configuration record	 * @param	string		Title/URL	 * @return	void	 */	function addQueueEntryForHook($cfgRec, $title)	{		$nparams = array(			'indexConfigUid' => $cfgRec['uid'],		// This must ALWAYS be the cfgRec uid!			'url' => $title,			'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']')	// Also just for information. Its good style to show that its an indexing configuration that added the entry.		);		$this->pObj->addQueueEntry_callBack($cfgRec['set_id'],$nparams,$this->callBack,$cfgRec['pid']);	}	/**	 * Deletes all data stored by indexed search for a given page	 *	 * @param	integer		Uid of the page to delete all pHash	 * @return	void	 */	function deleteFromIndex($id)	{			// Lookup old phash rows:		$oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash','index_section', 'page_id='.intval($id));		if (count($oldPhashRows))	{			$pHashesToDelete = array();			foreach ($oldPhashRows as $pHashRow)	{				$pHashesToDelete[] = $pHashRow['phash'];			}			$where_clause = 'phash IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($pHashesToDelete)).')';			$tables = explode(',', 'index_debug,index_fulltext,index_grlist,index_phash,index_rel,index_section');			foreach ($tables as $table)	{				$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, $where_clause);			}		}	}	/*************************	 *	 * Hook functions for TCEmain (indexing of records)	 *	 *************************/	/**	 * TCEmain hook function for on-the-fly indexing of database records	 *	 * @param	string		TCEmain command	 * @param	string		Table name	 * @param	string		Record ID. If new record its a string pointing to index inside t3lib_tcemain::substNEWwithIDs	 * @param	mixed		Target value (ignored)	 * @param	object		Reference to tcemain calling object	 * @return	void	 */	function processCmdmap_preProcess($command, $table, $id, $value, &$pObj)	{			// Clean up the index		if ($command=='delete' && $table == 'pages')	{			$this->deleteFromIndex($id);		}	}	/**	 * TCEmain hook function for on-the-fly indexing of database records	 *	 * @param	string		Status "new" or "update"	 * @param	string		Table name	 * @param	string		Record ID. If new record its a string pointing to index inside t3lib_tcemain::substNEWwithIDs	 * @param	array		Field array of updated fields in the operation	 * @param	object		Reference to tcemain calling object	 * @return	void	 */	function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, &$pObj) {			// Check if any fields are actually updated:		if (count($fieldArray))	{				// Translate new ids.			if ($status=='new')	{				$id = $pObj->substNEWwithIDs[$id];			} elseif ($table=='pages' && $status=='update' && ((array_key_exists('hidden',$fieldArray) && $fieldArray['hidden']==1) || (array_key_exists('no_search',$fieldArray) && $fieldArray['no_search']==1)))	{					// If the page should be hidden or not indexed after update, delete index for this page				$this->deleteFromIndex($id);			}				// Get full record and if exists, search for indexing configurations:			$currentRecord = t3lib_BEfunc::getRecord($table,$id);			if (is_array($currentRecord))	{					// Select all (not running) indexing configurations of type "record" (1) and which points to this table and is located on the same page as the record or pointing to the right source PID				$indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(					'*',					'index_config',					'hidden=0						AND (starttime=0 OR starttime<='.time().')						AND set_id=0						AND type=1						AND table2index='.$GLOBALS['TYPO3_DB']->fullQuoteStr($table,'index_config').'						AND (								(alternative_source_pid=0 AND pid='.intval($currentRecord['pid']).')								OR (alternative_source_pid='.intval($currentRecord['pid']).')							)						AND records_indexonchange=1						'.t3lib_BEfunc::deleteClause('index_config')				);				foreach($indexingConfigurations as $cfgRec)	{					$this->indexSingleRecord($currentRecord,$cfgRec);				}			}		}	}}/** * Crawler hook for indexed search. Works with the "crawler" extension * This hook is specifically used to index external files found on pages through the crawler extension. * * @author	Kasper Skaarhoj <kasperYYYY@typo3.com> * @package TYPO3 * @subpackage tx_indexedsearch * @see tx_indexedsearch_indexer::extractLinks() */class tx_indexedsearch_files {	/**	 * Call back function for execution of a log element	 *	 * @param	array		Params from log element.	 * @param	object		Parent object (tx_crawler lib)	 * @return	array		Result array	 */	function crawler_execute($params,&$pObj)	{			// Load indexer if not yet.		$this->loadIndexerClass();		if (is_array($params['conf']))	{				// Initialize the indexer class:			$indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');			$indexerObj->conf = $params['conf'];			$indexerObj->init();				// Index document:			if ($params['alturl'])	{				$fI = pathinfo($params['document']);				$ext = strtolower($fI['extension']);				$indexerObj->indexRegularDocument($params['alturl'], TRUE, $params['document'], $ext);			} else {				$indexerObj->indexRegularDocument($params['document'], TRUE);			}				// Return OK:			return array('content' => array());		}	}	/**	 * Include indexer class.	 *	 * @return	void	 */	function loadIndexerClass()	{		global $TYPO3_CONF_VARS;		require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');	}}if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.crawler.php'])	{	include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.crawler.php']);}?>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -