parser.php
来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页
PHP
2,095 行
$i += 2; $trail = $m[2]; } # The characters '<' and '>' (which were escaped by # removeHTMLtags()) should not be included in # URLs, per RFC 2396. if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { $trail = substr($url, $m2[0][1]) . $trail; $url = substr($url, 0, $m2[0][1]); } # Move trailing punctuation to $trail $sep = ',;\.:!?'; # If there is no left bracket, then consider right brackets fair game too if ( strpos( $url, '(' ) === false ) { $sep .= ')'; } $numSepChars = strspn( strrev( $url ), $sep ); if ( $numSepChars ) { $trail = substr( $url, -$numSepChars ) . $trail; $url = substr( $url, 0, -$numSepChars ); } # Normalize any HTML entities in input. They will be # re-escaped by makeExternalLink() or maybeMakeExternalImage() $url = Sanitizer::decodeCharReferences( $url ); # Escape any control characters introduced by the above step $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url ); # Is this an external image? $text = $this->maybeMakeExternalImage( $url ); if ( $text === false ) { # Not an image, make a link $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters $pasteurized = Parser::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } $s .= $text . $trail; } else { $s .= $protocol . $remainder; } } wfProfileOut( $fname ); return $s; } /** * Replace unusual URL escape codes with their equivalent characters * @param string * @return string * @static * @fixme This can merge genuinely required bits in the path or query string, * breaking legit URLs. A proper fix would treat the various parts of * the URL differently; as a workaround, just use the output for * statistical records, not for actual linking/output. */ function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', array( 'Parser', 'replaceUnusualEscapesCallback' ), $url ); } /** * Callback function used in replaceUnusualEscapes(). * Replaces unusual URL escape codes with their equivalent character * @static * @private */ function replaceUnusualEscapesCallback( $matches ) { $char = urldecode( $matches[0] ); $ord = ord( $char ); // Is it an unsafe or HTTP reserved character according to RFC 1738? if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { // No, shouldn't be escaped return $char; } else { // Yes, leave it escaped return $matches[0]; } } /** * make an image if it's allowed, either through the global * option or through the exception * @private */ function maybeMakeExternalImage( $url ) { $sk =& $this->mOptions->getSkin(); $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); $imagesexception = !empty($imagesfrom); $text = false; if ( $this->mOptions->getAllowExternalImages() || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) { if ( preg_match( EXT_IMAGE_REGEX, $url ) ) { # Image found $text = $sk->makeExternalImage( htmlspecialchars( $url ) ); } } return $text; } /** * Process [[ ]] wikilinks * * @private */ function replaceInternalLinks( $s ) { global $wgContLang; static $fname = 'Parser::replaceInternalLinks' ; wfProfileIn( $fname ); wfProfileIn( $fname.'-setup' ); static $tc = FALSE; # the % is needed to support urlencoded titles as well if ( !$tc ) { $tc = Title::legalChars() . '#%'; } $sk =& $this->mOptions->getSkin(); #split the entire text string on occurences of [[ $a = explode( '[[', ' ' . $s ); #get the first element (all text up to first [[), and remove the space we added $s = array_shift( $a ); $s = substr( $s, 1 ); # Match a link having the form [[namespace:link|alternate]]trail static $e1 = FALSE; if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } # Match cases where there is no "]]", which might still be images static $e1_img = FALSE; if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } # Match the end of a line for a word that's not followed by whitespace, # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched $e2 = wfMsgForContent( 'linkprefix' ); $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); if( is_null( $this->mTitle ) ) { throw new MWException( 'nooo' ); } $nottalk = !$this->mTitle->isTalkPage(); if ( $useLinkPrefixExtension ) { if ( preg_match( $e2, $s, $m ) ) { $first_prefix = $m[2]; } else { $first_prefix = false; } } else { $prefix = ''; } $selflink = $this->mTitle->getPrefixedText(); wfProfileOut( $fname.'-setup' ); $checkVariantLink = sizeof($wgContLang->getVariants())>1; $useSubpages = $this->areSubpagesAllowed(); # Loop for each link for ($k = 0; isset( $a[$k] ); $k++) { $line = $a[$k]; if ( $useLinkPrefixExtension ) { wfProfileIn( $fname.'-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; } else { $prefix=''; } # first link if($first_prefix) { $prefix = $first_prefix; $first_prefix = false; } wfProfileOut( $fname.'-prefixhandling' ); } $might_be_img = false; if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, # the real problem is with the $e1 regex # See bug 1300. # # Still some problems for cases where the ] is meant to be outside punctuation, # and no image is in sight. See bug 2095. # if( $text !== '' && preg_match( "/^\](.*)/s", $m[3], $n ) && strpos($text, '[') !== false ) { $text .= ']'; # so that replaceExternalLinks($text) works later $m[3] = $n[1]; } # fix up urlencoded title texts if(preg_match('/%/', $m[1] )) # Should anchors '#' also be rejected? $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode($m[1]) ); $trail = $m[3]; } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption $might_be_img = true; $text = $m[2]; if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]); $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line ; continue; } # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if (preg_match('/^(\b(?:' . wfUrlProtocols() . '))/', $m[1])) { $s .= $prefix . '[[' . $line ; continue; } # Make subpage if necessary if( $useSubpages ) { $link = $this->maybeDoSubpageLink( $m[1], $text ); } else { $link = $m[1]; } $noforce = (substr($m[1], 0, 1) != ':'); if (!$noforce) { # Strip off leading ':' $link = substr($link, 1); } $nt = Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) ); if( !$nt ) { $s .= $prefix . '[[' . $line; continue; } #check other language variants of the link #if the article does not exist if( $checkVariantLink && $nt->getArticleID() == 0 ) { $wgContLang->findVariantLink($link, $nt); } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); if ($might_be_img) { # if this is actually an invalid link if ($ns == NS_IMAGE && $noforce) { #but might be an image $found = false; while (isset ($a[$k+1]) ) { #look at the next 'line' to see if we can close it there $spliced = array_splice( $a, $k + 1, 1 ); $next_line = array_shift( $spliced ); if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) { # the first ]] closes the inner link, the second the image $found = true; $text .= '[[' . $m[1]; $trail = $m[2]; break; } elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) { #if there's exactly one ]] that's fine, we'll keep looking $text .= '[[' . $m[0]; } else { #if $next_line is invalid too, we need look no further $text .= '[[' . $next_line; break; } } if ( !$found ) { # we couldn't find the end of this imageLink, so output it raw #but don't ignore what might be perfectly normal links in the text we've examined $text = $this->replaceInternalLinks($text); $s .= $prefix . '[[' . $link . '|' . $text; # note: no $trail, because without an end, there *is* no trail continue; } } else { #it's not an image, so output it raw $s .= $prefix . '[[' . $link . '|' . $text; # note: no $trail, because without an end, there *is* no trail continue; } } $wasblank = ( '' == $text ); if( $wasblank ) $text = $link; # Link not escaped by : , create the various objects if( $noforce ) { # Interwikis if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . "\n"); $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; continue; } if ( $ns == NS_IMAGE ) { wfProfileIn( "$fname-image" ); if ( !wfIsBadImage( $nt->getDBkey() ) ) { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM $text = $this->replaceExternalLinks($text); $text = $this->replaceInternalLinks($text); # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); wfProfileOut( "$fname-image" ); continue; } else { # We still need to record the image's presence on the page $this->mOutput->addImage( $nt->getDBkey() ); } wfProfileOut( "$fname-image" ); } if ( $ns == NS_CATEGORY ) { wfProfileIn( "$fname-category" ); $s = rtrim($s . "\n"); # bug 87 if ( $wasblank ) { if ( $this->mTitle->getNamespace() == NS_CATEGORY ) { $sortkey = $this->mTitle->getText(); } else { $sortkey = $this->mTitle->getPrefixedText(); } } else { $sortkey = $text; } $sortkey = Sanitizer::decodeCharReferences( $sortkey ); $sortkey = str_replace( "\n", '', $sortkey ); $sortkey = $wgContLang->convertCategoryKey( $sortkey ); $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); /** * Strip the whitespace Category links produce, see bug 87 * @todo We might want to use trim($tmp, "\n") here. */ $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; wfProfileOut( "$fname-category" ); continue; } } if( ( $nt->getPrefixedText() === $selflink ) && ( $nt->getFragment() === '' ) ) { # Self-links are handled specially; generally de-link and change to bold. $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); continue; } # Special and Media are pseudo-namespaces; no pages actually exist in them if( $ns == NS_MEDIA ) { $link = $sk->makeMediaLinkObj( $nt, $text ); # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( $link ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); continue; } elseif( $ns == NS_SPECIAL ) { $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); continue; } elseif( $ns == NS_IMAGE ) { $img = Image::newFromTitle( $nt ); if( $img->exists() ) { // Force a blue link if the file exists; may be a remote // upload on the shared repository, and we want to see its // auto-generated page. $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); continue; } } $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); } wfProfileOut( $fname ); return $s; } /** * Make a link placeholder. The text returned can be later resolved to a real link with * replaceLinkHolders(). This is done for two reasons: firstly to avoid further * parsing of interwiki links, and secondly to allow all extistence checks and * article length checks (for stub links) to be bundled into a single query. * */ function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { if ( ! is_object($nt) ) { # Fail gracefully $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}"; } else { # Separate the link trail from the rest of the link list( $inside, $trail ) = Linker::splitTrail( $trail ); if ( $nt->isExternal() ) { $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); $this->mInterwikiLinkHolders['titles'][] = $nt; $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}"; } else { $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); $this->mLinkHolders['queries'][] = $query; $this->mLinkHolders['texts'][] = $prefix.$text.$inside; $this->mLinkHolders['titles'][] = $nt; $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}"; } }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?