parser.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页

PHP
2,095
字号
<?php/** * File for Parser and related classes * * @package MediaWiki * @subpackage Parser *//** * Update this version number when the ParserOutput format * changes in an incompatible way, so the parser cache * can automatically discard old data. */define( 'MW_PARSER_VERSION', '1.6.1' );/** * Variable substitution O(N^2) attack * * Without countermeasures, it would be possible to attack the parser by saving * a page filled with a large number of inclusions of large pages. The size of * the generated page would be proportional to the square of the input size. * Hence, we limit the number of inclusions of any given page, thus bringing any * attack back to O(N). */define( 'MAX_INCLUDE_REPEAT', 100 );define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Milliondefine( 'RLH_FOR_UPDATE', 1 );# Allowed values for $mOutputTypedefine( 'OT_HTML', 1 );define( 'OT_WIKI', 2 );define( 'OT_MSG' , 3 );# Flags for setFunctionHookdefine( 'SFH_NO_HASH', 1 );# string parameter for extractTags which will cause it# to strip HTML comments in addition to regular# <XML>-style tags. This should not be anything we# may want to use in wikisyntaxdefine( 'STRIP_COMMENTS', 'HTMLCommentStrip' );# Constants needed for external link processingdefine( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' );# Everything except bracket, space, or control charactersdefine( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' );# Including space, but excluding newlinesdefine( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' );define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );define( 'EXT_LINK_BRACKETED',  '/\[(\b(' . wfUrlProtocols() . ')'.	EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );define( 'EXT_IMAGE_REGEX',	'/^('.HTTP_PROTOCOLS.')'.  # Protocol	'('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path	'('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename);// State constants for the definition list colon extractiondefine( 'MW_COLON_STATE_TEXT', 0 );define( 'MW_COLON_STATE_TAG', 1 );define( 'MW_COLON_STATE_TAGSTART', 2 );define( 'MW_COLON_STATE_CLOSETAG', 3 );define( 'MW_COLON_STATE_TAGSLASH', 4 );define( 'MW_COLON_STATE_COMMENT', 5 );define( 'MW_COLON_STATE_COMMENTDASH', 6 );define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 );/** * PHP Parser * * Processes wiki markup * * <pre> * There are three main entry points into the Parser class: * parse() *   produces HTML output * preSaveTransform(). *   produces altered wiki markup. * transformMsg() *   performs brace substitution on MediaWiki messages * * Globals used: *    objects:   $wgLang, $wgContLang * * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! * * settings: *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*, *  $wgNamespacesWithSubpages, $wgAllowExternalImages*, *  $wgLocaltimezone, $wgAllowSpecialInclusion* * *  * only within ParserOptions * </pre> * * @package MediaWiki */class Parser{	/**#@+	 * @private	 */	# Persistent:	var $mTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables;	# Cleared with clearState():	var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();	var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;	var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix;	var $mTemplates,	// cache of already loaded templates, avoids		                // multiple SQL queries for the same string	    $mTemplatePath;	// stores an unsorted hash of all the templates already loaded		                // in this path. Used for loop detection.	# Temporary	# These are variables reset at least once per parse regardless of $clearState	var $mOptions,      // ParserOptions object		$mTitle,        // Title context, used for self-link rendering and similar things		$mOutputType,   // Output type, one of the OT_xxx constants		$mRevisionId;   // ID to display in {{REVISIONID}} tags	/**#@-*/	/**	 * Constructor	 *	 * @public	 */	function Parser() {		$this->mTagHooks = array();		$this->mFunctionHooks = array();		$this->mFunctionSynonyms = array( 0 => array(), 1 => array() );		$this->mFirstCall = true;	}	/**	 * Do various kinds of initialisation on the first call of the parser	 */	function firstCallInit() {		if ( !$this->mFirstCall ) {			return;		}		wfProfileIn( __METHOD__ );		global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;		$this->setHook( 'pre', array( $this, 'renderPreTag' ) );		$this->setFunctionHook( MAG_NS, array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_URLENCODE, array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_LCFIRST, array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_UCFIRST, array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_LC, array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_UC, array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_LOCALURL, array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_LOCALURLE, array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_FULLURL, array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_FULLURLE, array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_FORMATNUM, array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_GRAMMAR, array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_PLURAL, array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_NUMBEROFPAGES, array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_NUMBEROFUSERS, array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_NUMBEROFARTICLES, array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_NUMBEROFFILES, array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_NUMBEROFADMINS, array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH );		$this->setFunctionHook( MAG_LANGUAGE, array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH );		if ( $wgAllowDisplayTitle ) {			$this->setFunctionHook( MAG_DISPLAYTITLE, array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH );		}		if ( $wgAllowSlowParserFunctions ) {			$this->setFunctionHook( MAG_PAGESINNAMESPACE, array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH );		}				$this->initialiseVariables();		$this->mFirstCall = false;		wfProfileOut( __METHOD__ );	}			/**	 * Clear Parser state	 *	 * @private	 */	function clearState() {		if ( $this->mFirstCall ) {			$this->firstCallInit();		}		$this->mOutput = new ParserOutput;		$this->mAutonumber = 0;		$this->mLastSection = '';		$this->mDTopen = false;		$this->mIncludeCount = array();		$this->mStripState = array();		$this->mArgStack = array();		$this->mInPre = false;		$this->mInterwikiLinkHolders = array(			'texts' => array(),			'titles' => array()		);		$this->mLinkHolders = array(			'namespaces' => array(),			'dbkeys' => array(),			'queries' => array(),			'texts' => array(),			'titles' => array()		);		$this->mRevisionId = null;				/**		 * Prefix for temporary replacement strings for the multipass parser.		 * \x07 should never appear in input as it's disallowed in XML.		 * Using it at the front also gives us a little extra robustness		 * since it shouldn't match when butted up against identifier-like		 * string constructs.		 */		$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();		# Clear these on every parse, bug 4549 		$this->mTemplates = array(); 		$this->mTemplatePath = array();		$this->mShowToc = true;		$this->mForceTocPosition = false;		wfRunHooks( 'ParserClearState', array( &$this ) );	}	/**	 * Accessor for mUniqPrefix.	 *	 * @public	 */	function UniqPrefix() {		return $this->mUniqPrefix;	}	/**	 * Convert wikitext to HTML	 * Do not call this function recursively.	 *	 * @private	 * @param string $text Text we want to parse	 * @param Title &$title A title object	 * @param array $options	 * @param boolean $linestart	 * @param boolean $clearState	 * @param int $revid number to pass in {{REVISIONID}}	 * @return ParserOutput a ParserOutput	 */	function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) {		/**		 * First pass--just handle <nowiki> sections, pass the rest off		 * to internalParse() which does all the real work.		 */		global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;		$fname = 'Parser::parse';		wfProfileIn( $fname );		if ( $clearState ) {			$this->clearState();		}		$this->mOptions = $options;		$this->mTitle =& $title;		$this->mRevisionId = $revid;		$this->mOutputType = OT_HTML;		//$text = $this->strip( $text, $this->mStripState );		// VOODOO MAGIC FIX! Sometimes the above segfaults in PHP5.		$x =& $this->mStripState;		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$x ) );		$text = $this->strip( $text, $x );		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) );		# Hook to suspend the parser in this state		if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) {			wfProfileOut( $fname );			return $text ;		}		$text = $this->internalParse( $text );		$text = $this->unstrip( $text, $this->mStripState );		# Clean up special characters, only run once, next-to-last before doBlockLevels		$fixtags = array(			# french spaces, last one Guillemet-left			# only if there is something before the space			'/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1&nbsp;\\2',			# french spaces, Guillemet-right			'/(\\302\\253) /' => '\\1&nbsp;',		);		$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );		# only once and last		$text = $this->doBlockLevels( $text, $linestart );		$this->replaceLinkHolders( $text );		# the position of the parserConvert() call should not be changed. it		# assumes that the links are all replaced and the only thing left		# is the <nowiki> mark.		# Side-effects: this calls $this->mOutput->setTitleText()		$text = $wgContLang->parserConvert( $text, $this );		$text = $this->unstripNoWiki( $text, $this->mStripState );		wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );		$text = Sanitizer::normalizeCharReferences( $text );		if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {			$text = Parser::tidy($text);		} else {			# attempt to sanitize at least some nesting problems			# (bug #2702 and quite a few others)			$tidyregs = array(					# ''Something [http://www.cool.com cool''] --> 				# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>				'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>				'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',				# fix up an anchor inside another anchor, only				# at least for a single single nested link (bug 3695)				'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>				'\\1\\2</a>\\3</a>\\1\\4</a>',				# fix div inside inline elements- doBlockLevels won't wrap a line which				# contains a div, so fix it up here; replace				# div with escaped text				'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>				'\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',				# remove empty italic or bold tag pairs, some				# introduced by rules above				'/<([bi])><\/\\1>/' => '' 			);			$text = preg_replace( 				array_keys( $tidyregs ),				array_values( $tidyregs ),				$text );		}		wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );		$this->mOutput->setText( $text );		wfProfileOut( $fname );		return $this->mOutput;	}	/**	 * Get a random string	 *	 * @private	 * @static	 */	function getRandomString() {		return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));	}	function &getTitle() { return $this->mTitle; }	function getOptions() { return $this->mOptions; }	function getFunctionLang() {		global $wgLang, $wgContLang;		return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;	}	/**	 * Replaces all occurrences of HTML-style comments and the given tags	 * in the text with a random marker and returns teh next text. The output	 * parameter $matches will be an associative array filled with data in	 * the form:	 *   'UNIQ-xxxxx' => array(	 *     'element',	 *     'tag content',	 *     array( 'param' => 'x' ),	 *     '<element param="x">tag content</element>' ) )	 *	 * @param $elements list of element names. Comments are always extracted.	 * @param $text Source text string.	 * @param $uniq_prefix	 *	 * @private	 * @static	 */	function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){		$rand = Parser::getRandomString();		$n = 1;		$stripped = '';		$matches = array();		$taglist = implode( '|', $elements );		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";		while ( '' != $text ) {			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );			$stripped .= $p[0];			if( count( $p ) < 5 ) {				break;			}			if( count( $p ) > 5 ) {				// comment				$element    = $p[4];				$attributes = '';				$close      = '';				$inside     = $p[5];			} else {				// tag				$element    = $p[1];				$attributes = $p[2];				$close      = $p[3];				$inside     = $p[4];

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?