⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 class.indexer.php

📁 Typo3, 开源里边最强大的
💻 PHP
📖 第 1 页 / 共 5 页
字号:
	 * @return	void	 */	function removeOldIndexedPages($phash)	{			// Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here.		$tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');		foreach($tableArr as $table)	{			$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));		}			// Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file).		$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));	}	/********************************	 *	 * SQL; External media	 *	 *******************************/	/**	 * Updates db with information about the file	 *	 * @param	array		Array with phash and phash_grouping keys for file	 * @param	string		File name	 * @param	array		Array of "cHashParams" for files: This is for instance the page index for a PDF file (other document types it will be a zero)	 * @param	string		File extension determining the type of media.	 * @param	integer		Modification time of file.	 * @param	integer		Creation time of file.	 * @param	integer		Size of file in bytes	 * @param	integer		Content HASH value.	 * @param	array		Standard content array (using only title and body for a file)	 * @return	void	 */	function submitFilePage($hash,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts)	{			// Find item Type:		$storeItemType = $this->external_parsers[$ext]->ext2itemtype_map[$ext];		$storeItemType = $storeItemType ? $storeItemType : $ext;			// Remove any current data for this phash:		$this->removeOldIndexedFiles($hash['phash']);			// Split filename:		$fileParts = parse_url($file);			// Setting new		$fields = array(			'phash' => $hash['phash'],			'phash_grouping' => $hash['phash_grouping'],			'cHashParams' => serialize($subinfo),			'contentHash' => $content_md5h,			'data_filename' => $file,			'item_type' => $storeItemType,			'item_title' => trim($contentParts['title']) ? $contentParts['title'] : basename($file),			'item_description' => $this->bodyDescription($contentParts),			'item_mtime' => $mtime,			'item_size' => $size,			'item_crdate' => $ctime,			'tstamp' => time(),			'crdate' => time(),			'gr_list' => $this->conf['gr_list'], 			'externalUrl' => $fileParts['scheme'] ? 1 : 0, 			'recordUid' => intval($this->conf['recordUid']), 			'freeIndexUid' => intval($this->conf['freeIndexUid']), 			'freeIndexSetId' => intval($this->conf['freeIndexSetId']),		);		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);			// PROCESSING index_fulltext		$fields = array(			'phash' => $hash['phash'],			'fulltextdata' => implode(' ', $contentParts)		);		if ($this->indexerConfig['fullTextDataLength']>0)	{			$fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);		}		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);			// PROCESSING index_debug		if ($this->indexerConfig['debugMode'])	{			$fields = array(				'phash' => $hash['phash'],				'debuginfo' => serialize(array(						'cHashParams' => $subinfo,						'contentParts' => array_merge($contentParts,array('body' => substr($contentParts['body'],0,1000))),						'logs' => $this->internal_log,						'lexer' => $this->lexerObj->debugString,					))			);			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);		}	}	/**	 * Stores file gr_list for a file IF it does not exist already	 *	 * @param	integer		phash value of file	 * @return	void	 */	function submitFile_grlist($hash)	{			// Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one.		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');		if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{			$this->submit_grlist($hash,$hash);		}	}	/**	 * Stores file section for a file IF it does not exist	 *	 * @param	integer		phash value of file	 * @return	void	 */	function submitFile_section($hash)	{			// Testing if there is a section		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));		if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{			$this->submit_section($hash,$this->hash['phash']);		}	}	/**	 * Removes records for the indexed page, $phash	 *	 * @param	integer		phash value to flush	 * @return	void	 */	function removeOldIndexedFiles($phash)	{			// Removing old registrations for tables.		$tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug');		foreach($tableArr as $table)	{			$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));		}	}	/********************************	 *	 * SQL Helper functions	 *	 *******************************/	/**	 * Check the mtime / tstamp of the currently indexed page/file (based on phash)	 * Return positive integer if the page needs to be indexed	 *	 * @param	integer		mtime value to test against limits and indexed page (usually this is the mtime of the cached document)	 * @param	integer		"phash" used to select any already indexed page to see what its mtime is.	 * @return	integer		Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceeded and so indexing cannot occur.  -1) mtime matched so no need to reindex page. 0) N/A   1) Max age exceeded, page must be indexed again.   2) mtime of indexed page doesn't match mtime given for current content and we must index page.  3) No mtime was set, so we will index...  4) No indexed page found, so of course we will index.	 */	function checkMtimeTstamp($mtime,$phash)	{			// Select indexed page:		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));		$out = 0;			// If there was an indexing of the page...:		if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{			if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time())	{	// If max age is exceeded, index the page				$out = 1;		// The configured max-age was exceeded for the document and thus it's indexed.			} else {				if (!$this->tstamp_minAge || ($row['tstamp']+$this->tstamp_minAge)<time())	{	// if minAge is not set or if minAge is exceeded, consider at mtime					if ($mtime)	{		// It mtime is set, then it's tested. If not, the page must clearly be indexed.						if ($row['item_mtime'] != $mtime)	{	// And if mtime is different from the index_phash mtime, it's about time to re-index.							$out = 2;		// The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.						} else {							$out = -1;		// mtime matched the document, so no changes detected and no content updated							if ($this->tstamp_maxAge)	{								$this->log_setTSlogMessage('mtime matched, timestamp NOT updated because a maxAge is set ('.($row['tstamp'] + $this->tstamp_maxAge - time()).' seconds to expire time).',1);							} else {								$this->updateTstamp($phash);	// Update the timestatmp								$this->log_setTSlogMessage('mtime matched, timestamp updated.',1);							}						}					} else {$out = 3;	}	// The minimum age was exceed, but mtime was not set, so the page was indexed.				} else {$out = -2;}			// The minimum age was not exceeded			}		} else {$out = 4;}	// Page has never been indexed (is not represented in the index_phash table).		return $out;	}	/**	 * Check content hash in phash table	 *	 * @return	mixed		Returns true if the page needs to be indexed (that is, there was no result), otherwise the phash value (in an array) of the phash record to which the grlist_record should be related!	 */	function checkContentHash()	{			// With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page.		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h));		if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{			return $row;		}		return 1;	}	/**	 * Check content hash for external documents	 * Returns true if the document needs to be indexed (that is, there was no result)	 *	 * @param	integer		phash value to check (phash_grouping)	 * @param	integer		Content hash to check	 * @return	boolean		Returns true if the document needs to be indexed (that is, there was no result)	 */	function checkExternalDocContentHash($hashGr,$content_md5h)	{		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));		if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{			return 0;		}		return 1;	}	/**	 * Checks if a grlist record has been set for the phash value input (looking at the "real" phash of the current content, not the linked-to phash of the common search result page)	 *	 * @param	integer		Phash integer to test.	 * @return	void	 */	function is_grlist_set($phash_x)	{		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));		return $GLOBALS['TYPO3_DB']->sql_num_rows($res);	}	/**	 * Check if an grlist-entry for this hash exists and if not so, write one.	 *	 * @param	integer		phash of the search result that should be found	 * @param	integer		The real phash of the current content. The two values are different when a page with userlogin turns out to contain the exact same content as another already indexed version of the page; This is the whole reason for the grlist table in fact...	 * @return	void	 * @see submit_grlist()	 */	function update_grlist($phash,$phash_x)	{		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));		if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{			$this->submit_grlist($phash,$phash_x);			$this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);		}	}	/**	 * Update tstamp for a phash row.	 *	 * @param	integer		phash value	 * @param	integer		If set, update the mtime field to this value.	 * @return	void	 */	function updateTstamp($phash,$mtime=0)	{		$updateFields = array(			'tstamp' => time()		);		if ($mtime)	{ $updateFields['item_mtime'] = intval($mtime); }		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);	}	/**	 * Update SetID of the index_phash record.	 *	 * @param	integer		phash value	 * @return	void	 */	function updateSetId($phash)	{		$updateFields = array(			'freeIndexSetId' => intval($this->conf['freeIndexSetId'])		);		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);	}	/**	 * Update parsetime for phash row.	 *	 * @param	integer		phash value.	 * @param	integer		Parsetime value to set.	 * @return	void	 */	function updateParsetime($phash,$parsetime)	{		$updateFields = array(			'parsetime' => intval($parsetime)		);		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);	}	/**	 * Update section rootline for the page	 *	 * @return	void	 */	function updateRootline()	{		$updateFields = array();		$this->getRootLineFields($updateFields);		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);	}	/**	 * Adding values for root-line fields.	 * rl0, rl1 and rl2 are standard. A hook might add more.	 *	 * @param	array		Field array, passed by reference	 * @return	void	 */	function getRootLineFields(&$fieldArr)	{		$fieldArr['rl0'] = intval($this->conf['rootline_uids'][0]);		$fieldArr['rl1'] = intval($this->conf['rootline_uids'][1]);		$fieldArr['rl2'] = intval($this->conf['rootline_uids'][2]);		if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields']))	{			foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel)	{				$fieldArr[$fieldName] = intval($this->conf['rootline_uids'][$rootLineLevel]);			}		}	}	/**	 * Removes any indexed pages with userlogins which has the same contentHash	 * NOT USED anywhere inside this class!	 *	 * @return	void	 */	function removeLoginpagesWithContentHash()	{		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '					A.phash=B.phash					AND A.phash_grouping='.intval($this->hash['phash_grouping']).'					AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).'					AND A.contentHash='.intval($this->content_md5h));		while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{			$this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under lo

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -