parser.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页

PHP
2,095
字号
						$y = "{$z}<{$l}>{$y[0]}" ;					else {						$attributes = $this->unstripForHTML( $y[0] );						$y = "{$z}<{$l}".Sanitizer::fixTagAttributes($attributes, $l).">{$y[1]}" ;					}					$t[$k] .= $y ;					array_push ( $td , true ) ;				}			}		}		# Closing open td, tr && table		while ( count ( $td ) > 0 )		{			$l = array_pop ( $ltd ) ;			if ( array_pop ( $td ) ) $t[] = '</td>' ;			if ( array_pop ( $tr ) ) $t[] = '</tr>' ;			if ( !array_pop ( $has_opened_tr ) ) $t[] = "<tr><td></td></tr>" ;			$t[] = '</table>' ;		}		$t = implode ( "\n" , $t ) ;		# special case: don't return empty table		if($t == "<table>\n<tr><td></td></tr>\n</table>")			$t = '';		wfProfileOut( $fname );		return $t ;	}	/**	 * Helper function for parse() that transforms wiki markup into	 * HTML. Only called for $mOutputType == OT_HTML.	 *	 * @private	 */	function internalParse( $text ) {		$args = array();		$isMain = true;		$fname = 'Parser::internalParse';		wfProfileIn( $fname );		# Remove <noinclude> tags and <includeonly> sections		$text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) );		$text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') );		$text = preg_replace( '/<includeonly>.*?<\/includeonly>/s', '', $text );				$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) );		$text = $this->replaceVariables( $text, $args );		// Tables need to come after variable replacement for things to work		// properly; putting them before other transformations should keep		// exciting things like link expansions from showing up in surprising		// places.		$text = $this->doTableStuff( $text );		$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );		$text = $this->stripToc( $text );		$this->stripNoGallery( $text );		$text = $this->doHeadings( $text );		if($this->mOptions->getUseDynamicDates()) {			$df =& DateFormatter::getInstance();			$text = $df->reformat( $this->mOptions->getDateFormat(), $text );		}		$text = $this->doAllQuotes( $text );		$text = $this->replaceInternalLinks( $text );		$text = $this->replaceExternalLinks( $text );		# replaceInternalLinks may sometimes leave behind		# absolute URLs, which have to be masked to hide them from replaceExternalLinks		$text = str_replace($this->mUniqPrefix."NOPARSE", "", $text);		$text = $this->doMagicLinks( $text );		$text = $this->formatHeadings( $text, $isMain );		wfProfileOut( $fname );		return $text;	}	/**	 * Replace special strings like "ISBN xxx" and "RFC xxx" with	 * magic external links.	 *	 * @private	 */	function &doMagicLinks( &$text ) {		$text = $this->magicISBN( $text );		$text = $this->magicRFC( $text, 'RFC ', 'rfcurl' );		$text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' );		return $text;	}	/**	 * Parse headers and return html	 *	 * @private	 */	function doHeadings( $text ) {		$fname = 'Parser::doHeadings';		wfProfileIn( $fname );		for ( $i = 6; $i >= 1; --$i ) {			$h = str_repeat( '=', $i );			$text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",			  "<h{$i}>\\1</h{$i}>\\2", $text );		}		wfProfileOut( $fname );		return $text;	}	/**	 * Replace single quotes with HTML markup	 * @private	 * @return string the altered text	 */	function doAllQuotes( $text ) {		$fname = 'Parser::doAllQuotes';		wfProfileIn( $fname );		$outtext = '';		$lines = explode( "\n", $text );		foreach ( $lines as $line ) {			$outtext .= $this->doQuotes ( $line ) . "\n";		}		$outtext = substr($outtext, 0,-1);		wfProfileOut( $fname );		return $outtext;	}	/**	 * Helper function for doAllQuotes()	 * @private	 */	function doQuotes( $text ) {		$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );		if ( count( $arr ) == 1 )			return $text;		else		{			# First, do some preliminary work. This may shift some apostrophes from			# being mark-up to being text. It also counts the number of occurrences			# of bold and italics mark-ups.			$i = 0;			$numbold = 0;			$numitalics = 0;			foreach ( $arr as $r )			{				if ( ( $i % 2 ) == 1 )				{					# If there are ever four apostrophes, assume the first is supposed to					# be text, and the remaining three constitute mark-up for bold text.					if ( strlen( $arr[$i] ) == 4 )					{						$arr[$i-1] .= "'";						$arr[$i] = "'''";					}					# If there are more than 5 apostrophes in a row, assume they're all					# text except for the last 5.					else if ( strlen( $arr[$i] ) > 5 )					{						$arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );						$arr[$i] = "'''''";					}					# Count the number of occurrences of bold and italics mark-ups.					# We are not counting sequences of five apostrophes.					if ( strlen( $arr[$i] ) == 2 ) $numitalics++;  else					if ( strlen( $arr[$i] ) == 3 ) $numbold++;     else					if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }				}				$i++;			}			# If there is an odd number of both bold and italics, it is likely			# that one of the bold ones was meant to be an apostrophe followed			# by italics. Which one we cannot know for certain, but it is more			# likely to be one that has a single-letter word before it.			if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )			{				$i = 0;				$firstsingleletterword = -1;				$firstmultiletterword = -1;				$firstspace = -1;				foreach ( $arr as $r )				{					if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )					{						$x1 = substr ($arr[$i-1], -1);						$x2 = substr ($arr[$i-1], -2, 1);						if ($x1 == ' ') {							if ($firstspace == -1) $firstspace = $i;						} else if ($x2 == ' ') {							if ($firstsingleletterword == -1) $firstsingleletterword = $i;						} else {							if ($firstmultiletterword == -1) $firstmultiletterword = $i;						}					}					$i++;				}				# If there is a single-letter word, use it!				if ($firstsingleletterword > -1)				{					$arr [ $firstsingleletterword ] = "''";					$arr [ $firstsingleletterword-1 ] .= "'";				}				# If not, but there's a multi-letter word, use that one.				else if ($firstmultiletterword > -1)				{					$arr [ $firstmultiletterword ] = "''";					$arr [ $firstmultiletterword-1 ] .= "'";				}				# ... otherwise use the first one that has neither.				# (notice that it is possible for all three to be -1 if, for example,				# there is only one pentuple-apostrophe in the line)				else if ($firstspace > -1)				{					$arr [ $firstspace ] = "''";					$arr [ $firstspace-1 ] .= "'";				}			}			# Now let's actually convert our apostrophic mush to HTML!			$output = '';			$buffer = '';			$state = '';			$i = 0;			foreach ($arr as $r)			{				if (($i % 2) == 0)				{					if ($state == 'both')						$buffer .= $r;					else						$output .= $r;				}				else				{					if (strlen ($r) == 2)					{						if ($state == 'i')						{ $output .= '</i>'; $state = ''; }						else if ($state == 'bi')						{ $output .= '</i>'; $state = 'b'; }						else if ($state == 'ib')						{ $output .= '</b></i><b>'; $state = 'b'; }						else if ($state == 'both')						{ $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }						else # $state can be 'b' or ''						{ $output .= '<i>'; $state .= 'i'; }					}					else if (strlen ($r) == 3)					{						if ($state == 'b')						{ $output .= '</b>'; $state = ''; }						else if ($state == 'bi')						{ $output .= '</i></b><i>'; $state = 'i'; }						else if ($state == 'ib')						{ $output .= '</b>'; $state = 'i'; }						else if ($state == 'both')						{ $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }						else # $state can be 'i' or ''						{ $output .= '<b>'; $state .= 'b'; }					}					else if (strlen ($r) == 5)					{						if ($state == 'b')						{ $output .= '</b><i>'; $state = 'i'; }						else if ($state == 'i')						{ $output .= '</i><b>'; $state = 'b'; }						else if ($state == 'bi')						{ $output .= '</i></b>'; $state = ''; }						else if ($state == 'ib')						{ $output .= '</b></i>'; $state = ''; }						else if ($state == 'both')						{ $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }						else # ($state == '')						{ $buffer = ''; $state = 'both'; }					}				}				$i++;			}			# Now close all remaining tags.  Notice that the order is important.			if ($state == 'b' || $state == 'ib')				$output .= '</b>';			if ($state == 'i' || $state == 'bi' || $state == 'ib')				$output .= '</i>';			if ($state == 'bi')				$output .= '</b>';			if ($state == 'both')				$output .= '<b><i>'.$buffer.'</i></b>';			return $output;		}	}	/**	 * Replace external links	 * 	 * Note: this is all very hackish and the order of execution matters a lot.	 * Make sure to run maintenance/parserTests.php if you change this code.	 *	 * @private	 */	function replaceExternalLinks( $text ) {		global $wgContLang;		$fname = 'Parser::replaceExternalLinks';		wfProfileIn( $fname );		$sk =& $this->mOptions->getSkin();		$bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );		$s = $this->replaceFreeExternalLinks( array_shift( $bits ) );		$i = 0;		while ( $i<count( $bits ) ) {			$url = $bits[$i++];			$protocol = $bits[$i++];			$text = $bits[$i++];			$trail = $bits[$i++];			# The characters '<' and '>' (which were escaped by			# removeHTMLtags()) should not be included in			# URLs, per RFC 2396.			if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {				$text = substr($url, $m2[0][1]) . ' ' . $text;				$url = substr($url, 0, $m2[0][1]);			}			# If the link text is an image URL, replace it with an <img> tag			# This happened by accident in the original parser, but some people used it extensively			$img = $this->maybeMakeExternalImage( $text );			if ( $img !== false ) {				$text = $img;			}			$dtrail = '';			# Set linktype for CSS - if URL==text, link is essentially free			$linktype = ($text == $url) ? 'free' : 'text';			# No link text, e.g. [http://domain.tld/some.link]			if ( $text == '' ) {				# Autonumber if allowed. See bug #5918				if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) {					$text = '[' . ++$this->mAutonumber . ']';					$linktype = 'autonumber';				} else {					# Otherwise just use the URL					$text = htmlspecialchars( $url );					$linktype = 'free';				}			} else {				# Have link text, e.g. [http://domain.tld/some.link text]s				# Check for trail				list( $dtrail, $trail ) = Linker::splitTrail( $trail );			}			$text = $wgContLang->markNoConversion($text);			# Normalize any HTML entities in input. They will be			# re-escaped by makeExternalLink().			$url = Sanitizer::decodeCharReferences( $url );						# Escape any control characters introduced by the above step			$url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url );			# Process the trail (i.e. everything after this link up until start of the next link),			# replacing any non-bracketed links			$trail = $this->replaceFreeExternalLinks( $trail );			# Use the encoded URL			# This means that users can paste URLs directly into the text			# Funny characters like &ouml; aren't valid in URLs anyway			# This was changed in August 2004			$s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail;			# Register link in the output object.			# Replace unnecessary URL escape codes with the referenced character			# This prevents spammers from hiding links from the filters			$pasteurized = Parser::replaceUnusualEscapes( $url );			$this->mOutput->addExternalLink( $pasteurized );		}		wfProfileOut( $fname );		return $s;	}	/**	 * Replace anything that looks like a URL with a link	 * @private	 */	function replaceFreeExternalLinks( $text ) {		global $wgContLang;		$fname = 'Parser::replaceFreeExternalLinks';		wfProfileIn( $fname );		$bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );		$s = array_shift( $bits );		$i = 0;		$sk =& $this->mOptions->getSkin();		while ( $i < count( $bits ) ){			$protocol = $bits[$i++];			$remainder = $bits[$i++];			if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {				# Found some characters after the protocol that look promising				$url = $protocol . $m[1];				$trail = $m[2];				# special case: handle urls as url args:				# http://www.example.com/foo?=http://www.example.com/bar				if(strlen($trail) == 0 && 					isset($bits[$i]) &&					preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) &&					preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) 				{					# add protocol, arg					$url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?