parser.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页

PHP
2,095
字号
					$i += 2;					$trail = $m[2];				}				# The characters '<' and '>' (which were escaped by				# removeHTMLtags()) should not be included in				# URLs, per RFC 2396.				if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {					$trail = substr($url, $m2[0][1]) . $trail;					$url = substr($url, 0, $m2[0][1]);				}				# Move trailing punctuation to $trail				$sep = ',;\.:!?';				# If there is no left bracket, then consider right brackets fair game too				if ( strpos( $url, '(' ) === false ) {					$sep .= ')';				}				$numSepChars = strspn( strrev( $url ), $sep );				if ( $numSepChars ) {					$trail = substr( $url, -$numSepChars ) . $trail;					$url = substr( $url, 0, -$numSepChars );				}				# Normalize any HTML entities in input. They will be				# re-escaped by makeExternalLink() or maybeMakeExternalImage()				$url = Sanitizer::decodeCharReferences( $url );								# Escape any control characters introduced by the above step				$url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url );				# Is this an external image?				$text = $this->maybeMakeExternalImage( $url );				if ( $text === false ) {					# Not an image, make a link					$text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );					# Register it in the output object...					# Replace unnecessary URL escape codes with their equivalent characters					$pasteurized = Parser::replaceUnusualEscapes( $url );					$this->mOutput->addExternalLink( $pasteurized );				}				$s .= $text . $trail;			} else {				$s .= $protocol . $remainder;			}		}		wfProfileOut( $fname );		return $s;	}	/**	 * Replace unusual URL escape codes with their equivalent characters	 * @param string	 * @return string	 * @static	 * @fixme This can merge genuinely required bits in the path or query string,	 *        breaking legit URLs. A proper fix would treat the various parts of	 *        the URL differently; as a workaround, just use the output for	 *        statistical records, not for actual linking/output.	 */	function replaceUnusualEscapes( $url ) {		return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',			array( 'Parser', 'replaceUnusualEscapesCallback' ), $url );	}	/**	 * Callback function used in replaceUnusualEscapes().	 * Replaces unusual URL escape codes with their equivalent character	 * @static	 * @private	 */	function replaceUnusualEscapesCallback( $matches ) {		$char = urldecode( $matches[0] );		$ord = ord( $char );		// Is it an unsafe or HTTP reserved character according to RFC 1738?		if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {			// No, shouldn't be escaped			return $char;		} else {			// Yes, leave it escaped			return $matches[0];		}	}	/**	 * make an image if it's allowed, either through the global	 * option or through the exception	 * @private	 */	function maybeMakeExternalImage( $url ) {		$sk =& $this->mOptions->getSkin();		$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();		$imagesexception = !empty($imagesfrom);		$text = false;		if ( $this->mOptions->getAllowExternalImages()		     || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) {			if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {				# Image found				$text = $sk->makeExternalImage( htmlspecialchars( $url ) );			}		}		return $text;	}	/**	 * Process [[ ]] wikilinks	 *	 * @private	 */	function replaceInternalLinks( $s ) {		global $wgContLang;		static $fname = 'Parser::replaceInternalLinks' ;		wfProfileIn( $fname );		wfProfileIn( $fname.'-setup' );		static $tc = FALSE;		# the % is needed to support urlencoded titles as well		if ( !$tc ) { $tc = Title::legalChars() . '#%'; }		$sk =& $this->mOptions->getSkin();		#split the entire text string on occurences of [[		$a = explode( '[[', ' ' . $s );		#get the first element (all text up to first [[), and remove the space we added		$s = array_shift( $a );		$s = substr( $s, 1 );		# Match a link having the form [[namespace:link|alternate]]trail		static $e1 = FALSE;		if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; }		# Match cases where there is no "]]", which might still be images		static $e1_img = FALSE;		if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }		# Match the end of a line for a word that's not followed by whitespace,		# e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched		$e2 = wfMsgForContent( 'linkprefix' );		$useLinkPrefixExtension = $wgContLang->linkPrefixExtension();		if( is_null( $this->mTitle ) ) {			throw new MWException( 'nooo' );		}		$nottalk = !$this->mTitle->isTalkPage();		if ( $useLinkPrefixExtension ) {			if ( preg_match( $e2, $s, $m ) ) {				$first_prefix = $m[2];			} else {				$first_prefix = false;			}		} else {			$prefix = '';		}		$selflink = $this->mTitle->getPrefixedText();		wfProfileOut( $fname.'-setup' );		$checkVariantLink = sizeof($wgContLang->getVariants())>1;		$useSubpages = $this->areSubpagesAllowed();		# Loop for each link		for ($k = 0; isset( $a[$k] ); $k++) {			$line = $a[$k];			if ( $useLinkPrefixExtension ) {				wfProfileIn( $fname.'-prefixhandling' );				if ( preg_match( $e2, $s, $m ) ) {					$prefix = $m[2];					$s = $m[1];				} else {					$prefix='';				}				# first link				if($first_prefix) {					$prefix = $first_prefix;					$first_prefix = false;				}				wfProfileOut( $fname.'-prefixhandling' );			}			$might_be_img = false;			if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt				$text = $m[2];				# If we get a ] at the beginning of $m[3] that means we have a link that's something like:				# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,				# the real problem is with the $e1 regex				# See bug 1300.				#				# Still some problems for cases where the ] is meant to be outside punctuation,				# and no image is in sight. See bug 2095.				#				if( $text !== '' && 					preg_match( "/^\](.*)/s", $m[3], $n ) && 					strpos($text, '[') !== false 				) 				{					$text .= ']'; # so that replaceExternalLinks($text) works later					$m[3] = $n[1];				}				# fix up urlencoded title texts				if(preg_match('/%/', $m[1] )) 					# Should anchors '#' also be rejected?					$m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]) );				$trail = $m[3];			} elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption				$might_be_img = true;				$text = $m[2];				if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);				$trail = "";			} else { # Invalid form; output directly				$s .= $prefix . '[[' . $line ;				continue;			}			# Don't allow internal links to pages containing			# PROTO: where PROTO is a valid URL protocol; these			# should be external links.			if (preg_match('/^(\b(?:' . wfUrlProtocols() . '))/', $m[1])) {				$s .= $prefix . '[[' . $line ;				continue;			}			# Make subpage if necessary			if( $useSubpages ) {				$link = $this->maybeDoSubpageLink( $m[1], $text );			} else {				$link = $m[1];			}			$noforce = (substr($m[1], 0, 1) != ':');			if (!$noforce) {				# Strip off leading ':'				$link = substr($link, 1);			}			$nt = Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) );			if( !$nt ) {				$s .= $prefix . '[[' . $line;				continue;			}			#check other language variants of the link			#if the article does not exist			if( $checkVariantLink			    && $nt->getArticleID() == 0 ) {				$wgContLang->findVariantLink($link, $nt);			}			$ns = $nt->getNamespace();			$iw = $nt->getInterWiki();			if ($might_be_img) { # if this is actually an invalid link				if ($ns == NS_IMAGE && $noforce) { #but might be an image					$found = false;					while (isset ($a[$k+1]) ) {						#look at the next 'line' to see if we can close it there						$spliced = array_splice( $a, $k + 1, 1 );						$next_line = array_shift( $spliced );						if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) {						# the first ]] closes the inner link, the second the image							$found = true;							$text .= '[[' . $m[1];							$trail = $m[2];							break;						} elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) {							#if there's exactly one ]] that's fine, we'll keep looking							$text .= '[[' . $m[0];						} else {							#if $next_line is invalid too, we need look no further							$text .= '[[' . $next_line;							break;						}					}					if ( !$found ) {						# we couldn't find the end of this imageLink, so output it raw						#but don't ignore what might be perfectly normal links in the text we've examined						$text = $this->replaceInternalLinks($text);						$s .= $prefix . '[[' . $link . '|' . $text;						# note: no $trail, because without an end, there *is* no trail						continue;					}				} else { #it's not an image, so output it raw					$s .= $prefix . '[[' . $link . '|' . $text;					# note: no $trail, because without an end, there *is* no trail					continue;				}			}			$wasblank = ( '' == $text );			if( $wasblank ) $text = $link;			# Link not escaped by : , create the various objects			if( $noforce ) {				# Interwikis				if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {					$this->mOutput->addLanguageLink( $nt->getFullText() );					$s = rtrim($s . "\n");					$s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;					continue;				}				if ( $ns == NS_IMAGE ) {					wfProfileIn( "$fname-image" );					if ( !wfIsBadImage( $nt->getDBkey() ) ) {						# recursively parse links inside the image caption						# actually, this will parse them in any other parameters, too,						# but it might be hard to fix that, and it doesn't matter ATM						$text = $this->replaceExternalLinks($text);						$text = $this->replaceInternalLinks($text);						# cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them						$s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail;						$this->mOutput->addImage( $nt->getDBkey() );						wfProfileOut( "$fname-image" );						continue;					} else {						# We still need to record the image's presence on the page						$this->mOutput->addImage( $nt->getDBkey() );					}					wfProfileOut( "$fname-image" );				}				if ( $ns == NS_CATEGORY ) {					wfProfileIn( "$fname-category" );					$s = rtrim($s . "\n"); # bug 87					if ( $wasblank ) {						if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {							$sortkey = $this->mTitle->getText();						} else {							$sortkey = $this->mTitle->getPrefixedText();						}					} else {						$sortkey = $text;					}					$sortkey = Sanitizer::decodeCharReferences( $sortkey );					$sortkey = str_replace( "\n", '', $sortkey );					$sortkey = $wgContLang->convertCategoryKey( $sortkey );					$this->mOutput->addCategory( $nt->getDBkey(), $sortkey );					/**					 * Strip the whitespace Category links produce, see bug 87					 * @todo We might want to use trim($tmp, "\n") here.					 */					$s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;					wfProfileOut( "$fname-category" );					continue;				}			}			if( ( $nt->getPrefixedText() === $selflink ) &&			    ( $nt->getFragment() === '' ) ) {				# Self-links are handled specially; generally de-link and change to bold.				$s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );				continue;			}			# Special and Media are pseudo-namespaces; no pages actually exist in them			if( $ns == NS_MEDIA ) {				$link = $sk->makeMediaLinkObj( $nt, $text );				# Cloak with NOPARSE to avoid replacement in replaceExternalLinks				$s .= $prefix . $this->armorLinks( $link ) . $trail;				$this->mOutput->addImage( $nt->getDBkey() );				continue;			} elseif( $ns == NS_SPECIAL ) {				$s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );				continue;			} elseif( $ns == NS_IMAGE ) {				$img = Image::newFromTitle( $nt );				if( $img->exists() ) {					// Force a blue link if the file exists; may be a remote					// upload on the shared repository, and we want to see its					// auto-generated page.					$s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );					continue;				}			}			$s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix );		}		wfProfileOut( $fname );		return $s;	}	/**	 * Make a link placeholder. The text returned can be later resolved to a real link with	 * replaceLinkHolders(). This is done for two reasons: firstly to avoid further	 * parsing of interwiki links, and secondly to allow all extistence checks and	 * article length checks (for stub links) to be bundled into a single query.	 *	 */	function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {		if ( ! is_object($nt) ) {			# Fail gracefully			$retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";		} else {			# Separate the link trail from the rest of the link			list( $inside, $trail ) = Linker::splitTrail( $trail );			if ( $nt->isExternal() ) {				$nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside );				$this->mInterwikiLinkHolders['titles'][] = $nt;				$retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}";			} else {				$nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() );				$this->mLinkHolders['dbkeys'][] = $nt->getDBkey();				$this->mLinkHolders['queries'][] = $query;				$this->mLinkHolders['texts'][] = $prefix.$text.$inside;				$this->mLinkHolders['titles'][] = $nt;				$retVal = '<!--LINK '. ($nr-1) ."-->{$trail}";			}		}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?