parser.php
来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页
PHP
2,095 行
$y = "{$z}<{$l}>{$y[0]}" ; else { $attributes = $this->unstripForHTML( $y[0] ); $y = "{$z}<{$l}".Sanitizer::fixTagAttributes($attributes, $l).">{$y[1]}" ; } $t[$k] .= $y ; array_push ( $td , true ) ; } } } # Closing open td, tr && table while ( count ( $td ) > 0 ) { $l = array_pop ( $ltd ) ; if ( array_pop ( $td ) ) $t[] = '</td>' ; if ( array_pop ( $tr ) ) $t[] = '</tr>' ; if ( !array_pop ( $has_opened_tr ) ) $t[] = "<tr><td></td></tr>" ; $t[] = '</table>' ; } $t = implode ( "\n" , $t ) ; # special case: don't return empty table if($t == "<table>\n<tr><td></td></tr>\n</table>") $t = ''; wfProfileOut( $fname ); return $t ; } /** * Helper function for parse() that transforms wiki markup into * HTML. Only called for $mOutputType == OT_HTML. * * @private */ function internalParse( $text ) { $args = array(); $isMain = true; $fname = 'Parser::internalParse'; wfProfileIn( $fname ); # Remove <noinclude> tags and <includeonly> sections $text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) ); $text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') ); $text = preg_replace( '/<includeonly>.*?<\/includeonly>/s', '', $text ); $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) ); $text = $this->replaceVariables( $text, $args ); // Tables need to come after variable replacement for things to work // properly; putting them before other transformations should keep // exciting things like link expansions from showing up in surprising // places. $text = $this->doTableStuff( $text ); $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); $text = $this->stripToc( $text ); $this->stripNoGallery( $text ); $text = $this->doHeadings( $text ); if($this->mOptions->getUseDynamicDates()) { $df =& DateFormatter::getInstance(); $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); } $text = $this->doAllQuotes( $text ); $text = $this->replaceInternalLinks( $text ); $text = $this->replaceExternalLinks( $text ); # replaceInternalLinks may sometimes leave behind # absolute URLs, which have to be masked to hide them from replaceExternalLinks $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text); $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $isMain ); wfProfileOut( $fname ); return $text; } /** * Replace special strings like "ISBN xxx" and "RFC xxx" with * magic external links. * * @private */ function &doMagicLinks( &$text ) { $text = $this->magicISBN( $text ); $text = $this->magicRFC( $text, 'RFC ', 'rfcurl' ); $text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' ); return $text; } /** * Parse headers and return html * * @private */ function doHeadings( $text ) { $fname = 'Parser::doHeadings'; wfProfileIn( $fname ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m", "<h{$i}>\\1</h{$i}>\\2", $text ); } wfProfileOut( $fname ); return $text; } /** * Replace single quotes with HTML markup * @private * @return string the altered text */ function doAllQuotes( $text ) { $fname = 'Parser::doAllQuotes'; wfProfileIn( $fname ); $outtext = ''; $lines = explode( "\n", $text ); foreach ( $lines as $line ) { $outtext .= $this->doQuotes ( $line ) . "\n"; } $outtext = substr($outtext, 0,-1); wfProfileOut( $fname ); return $outtext; } /** * Helper function for doAllQuotes() * @private */ function doQuotes( $text ) { $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( count( $arr ) == 1 ) return $text; else { # First, do some preliminary work. This may shift some apostrophes from # being mark-up to being text. It also counts the number of occurrences # of bold and italics mark-ups. $i = 0; $numbold = 0; $numitalics = 0; foreach ( $arr as $r ) { if ( ( $i % 2 ) == 1 ) { # If there are ever four apostrophes, assume the first is supposed to # be text, and the remaining three constitute mark-up for bold text. if ( strlen( $arr[$i] ) == 4 ) { $arr[$i-1] .= "'"; $arr[$i] = "'''"; } # If there are more than 5 apostrophes in a row, assume they're all # text except for the last 5. else if ( strlen( $arr[$i] ) > 5 ) { $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); $arr[$i] = "'''''"; } # Count the number of occurrences of bold and italics mark-ups. # We are not counting sequences of five apostrophes. if ( strlen( $arr[$i] ) == 2 ) $numitalics++; else if ( strlen( $arr[$i] ) == 3 ) $numbold++; else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; } } $i++; } # If there is an odd number of both bold and italics, it is likely # that one of the bold ones was meant to be an apostrophe followed # by italics. Which one we cannot know for certain, but it is more # likely to be one that has a single-letter word before it. if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { $i = 0; $firstsingleletterword = -1; $firstmultiletterword = -1; $firstspace = -1; foreach ( $arr as $r ) { if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) { $x1 = substr ($arr[$i-1], -1); $x2 = substr ($arr[$i-1], -2, 1); if ($x1 == ' ') { if ($firstspace == -1) $firstspace = $i; } else if ($x2 == ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; } else { if ($firstmultiletterword == -1) $firstmultiletterword = $i; } } $i++; } # If there is a single-letter word, use it! if ($firstsingleletterword > -1) { $arr [ $firstsingleletterword ] = "''"; $arr [ $firstsingleletterword-1 ] .= "'"; } # If not, but there's a multi-letter word, use that one. else if ($firstmultiletterword > -1) { $arr [ $firstmultiletterword ] = "''"; $arr [ $firstmultiletterword-1 ] .= "'"; } # ... otherwise use the first one that has neither. # (notice that it is possible for all three to be -1 if, for example, # there is only one pentuple-apostrophe in the line) else if ($firstspace > -1) { $arr [ $firstspace ] = "''"; $arr [ $firstspace-1 ] .= "'"; } } # Now let's actually convert our apostrophic mush to HTML! $output = ''; $buffer = ''; $state = ''; $i = 0; foreach ($arr as $r) { if (($i % 2) == 0) { if ($state == 'both') $buffer .= $r; else $output .= $r; } else { if (strlen ($r) == 2) { if ($state == 'i') { $output .= '</i>'; $state = ''; } else if ($state == 'bi') { $output .= '</i>'; $state = 'b'; } else if ($state == 'ib') { $output .= '</b></i><b>'; $state = 'b'; } else if ($state == 'both') { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; } else # $state can be 'b' or '' { $output .= '<i>'; $state .= 'i'; } } else if (strlen ($r) == 3) { if ($state == 'b') { $output .= '</b>'; $state = ''; } else if ($state == 'bi') { $output .= '</i></b><i>'; $state = 'i'; } else if ($state == 'ib') { $output .= '</b>'; $state = 'i'; } else if ($state == 'both') { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; } else # $state can be 'i' or '' { $output .= '<b>'; $state .= 'b'; } } else if (strlen ($r) == 5) { if ($state == 'b') { $output .= '</b><i>'; $state = 'i'; } else if ($state == 'i') { $output .= '</i><b>'; $state = 'b'; } else if ($state == 'bi') { $output .= '</i></b>'; $state = ''; } else if ($state == 'ib') { $output .= '</b></i>'; $state = ''; } else if ($state == 'both') { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; } else # ($state == '') { $buffer = ''; $state = 'both'; } } } $i++; } # Now close all remaining tags. Notice that the order is important. if ($state == 'b' || $state == 'ib') $output .= '</b>'; if ($state == 'i' || $state == 'bi' || $state == 'ib') $output .= '</i>'; if ($state == 'bi') $output .= '</b>'; if ($state == 'both') $output .= '<b><i>'.$buffer.'</i></b>'; return $output; } } /** * Replace external links * * Note: this is all very hackish and the order of execution matters a lot. * Make sure to run maintenance/parserTests.php if you change this code. * * @private */ function replaceExternalLinks( $text ) { global $wgContLang; $fname = 'Parser::replaceExternalLinks'; wfProfileIn( $fname ); $sk =& $this->mOptions->getSkin(); $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); $s = $this->replaceFreeExternalLinks( array_shift( $bits ) ); $i = 0; while ( $i<count( $bits ) ) { $url = $bits[$i++]; $protocol = $bits[$i++]; $text = $bits[$i++]; $trail = $bits[$i++]; # The characters '<' and '>' (which were escaped by # removeHTMLtags()) should not be included in # URLs, per RFC 2396. if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { $text = substr($url, $m2[0][1]) . ' ' . $text; $url = substr($url, 0, $m2[0][1]); } # If the link text is an image URL, replace it with an <img> tag # This happened by accident in the original parser, but some people used it extensively $img = $this->maybeMakeExternalImage( $text ); if ( $img !== false ) { $text = $img; } $dtrail = ''; # Set linktype for CSS - if URL==text, link is essentially free $linktype = ($text == $url) ? 'free' : 'text'; # No link text, e.g. [http://domain.tld/some.link] if ( $text == '' ) { # Autonumber if allowed. See bug #5918 if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) { $text = '[' . ++$this->mAutonumber . ']'; $linktype = 'autonumber'; } else { # Otherwise just use the URL $text = htmlspecialchars( $url ); $linktype = 'free'; } } else { # Have link text, e.g. [http://domain.tld/some.link text]s # Check for trail list( $dtrail, $trail ) = Linker::splitTrail( $trail ); } $text = $wgContLang->markNoConversion($text); # Normalize any HTML entities in input. They will be # re-escaped by makeExternalLink(). $url = Sanitizer::decodeCharReferences( $url ); # Escape any control characters introduced by the above step $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url ); # Process the trail (i.e. everything after this link up until start of the next link), # replacing any non-bracketed links $trail = $this->replaceFreeExternalLinks( $trail ); # Use the encoded URL # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail; # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character # This prevents spammers from hiding links from the filters $pasteurized = Parser::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } wfProfileOut( $fname ); return $s; } /** * Replace anything that looks like a URL with a link * @private */ function replaceFreeExternalLinks( $text ) { global $wgContLang; $fname = 'Parser::replaceFreeExternalLinks'; wfProfileIn( $fname ); $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); $s = array_shift( $bits ); $i = 0; $sk =& $this->mOptions->getSkin(); while ( $i < count( $bits ) ){ $protocol = $bits[$i++]; $remainder = $bits[$i++]; if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { # Found some characters after the protocol that look promising $url = $protocol . $m[1]; $trail = $m[2]; # special case: handle urls as url args: # http://www.example.com/foo?=http://www.example.com/bar if(strlen($trail) == 0 && isset($bits[$i]) && preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) && preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) { # add protocol, arg $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?