parser.php
来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页
PHP
2,095 行
} $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU'; $stripped .= $marker; if ( $close === '/>' ) { // Empty element tag, <tag /> $content = null; $text = $inside; $tail = null; } else { if( $element == '!--' ) { $end = '/(-->)/'; } else { $end = "/(<\\/$element\\s*>)/i"; } $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); $content = $q[0]; if( count( $q ) < 3 ) { # No end tag -- let it run out to the end of the text. $tail = ''; $text = ''; } else { $tail = $q[1]; $text = $q[2]; } } $matches[$marker] = array( $element, $content, Sanitizer::decodeTagAttributes( $attributes ), "<$element$attributes$close$content$tail" ); } return $stripped; } /** * Strips and renders nowiki, pre, math, hiero * If $render is set, performs necessary rendering operations on plugins * Returns the text, and fills an array with data needed in unstrip() * If the $state is already a valid strip state, it adds to the state * * @param bool $stripcomments when set, HTML comments <!-- like this --> * will be stripped in addition to other tags. This is important * for section editing, where these comments cause confusion when * counting the sections in the wikisource * * @param array dontstrip contains tags which should not be stripped; * used to prevent stipping of <gallery> when saving (fixes bug 2700) * * @private */ function strip( $text, &$state, $stripcomments = false , $dontstrip = array () ) { $render = ($this->mOutputType == OT_HTML); # Replace any instances of the placeholders $uniq_prefix = $this->mUniqPrefix; #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); $commentState = array(); $elements = array_merge( array( 'nowiki', 'gallery' ), array_keys( $this->mTagHooks ) ); global $wgRawHtml; if( $wgRawHtml ) { $elements[] = 'html'; } if( $this->mOptions->getUseTeX() ) { $elements[] = 'math'; } # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700) foreach ( $elements AS $k => $v ) { if ( !in_array ( $v , $dontstrip ) ) continue; unset ( $elements[$k] ); } $matches = array(); $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); foreach( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; if( $render ) { $tagName = strtolower( $element ); switch( $tagName ) { case '!--': // Comment if( substr( $tag, -3 ) == '-->' ) { $output = $tag; } else { // Unclosed comment in input. // Close it so later stripping can remove it $output = "$tag-->"; } break; case 'html': if( $wgRawHtml ) { $output = $content; break; } // Shouldn't happen otherwise. :) case 'nowiki': $output = wfEscapeHTMLTagsOnly( $content ); break; case 'math': $output = MathRenderer::renderMath( $content ); break; case 'gallery': $output = $this->renderImageGallery( $content, $params ); break; default: if( isset( $this->mTagHooks[$tagName] ) ) { $output = call_user_func_array( $this->mTagHooks[$tagName], array( $content, $params, $this ) ); } else { throw new MWException( "Invalid call hook $element" ); } } } else { // Just stripping tags; keep the source $output = $tag; } if( !$stripcomments && $element == '!--' ) { $commentState[$marker] = $output; } else { $state[$element][$marker] = $output; } } # Unstrip comments unless explicitly told otherwise. # (The comments are always stripped prior to this point, so as to # not invoke any extension tags / parser hooks contained within # a comment.) if ( !$stripcomments ) { // Put them all back and forget them $text = strtr( $text, $commentState ); } return $text; } /** * Restores pre, math, and other extensions removed by strip() * * always call unstripNoWiki() after this one * @private */ function unstrip( $text, &$state ) { if ( !is_array( $state ) ) { return $text; } $replacements = array(); foreach( $state as $tag => $contentDict ) { if( $tag != 'nowiki' && $tag != 'html' ) { foreach( $contentDict as $uniq => $content ) { $replacements[$uniq] = $content; } } } $text = strtr( $text, $replacements ); return $text; } /** * Always call this after unstrip() to preserve the order * * @private */ function unstripNoWiki( $text, &$state ) { if ( !is_array( $state ) ) { return $text; } $replacements = array(); foreach( $state as $tag => $contentDict ) { if( $tag == 'nowiki' || $tag == 'html' ) { foreach( $contentDict as $uniq => $content ) { $replacements[$uniq] = $content; } } } $text = strtr( $text, $replacements ); return $text; } /** * Add an item to the strip state * Returns the unique tag which must be inserted into the stripped text * The tag will be replaced with the original text in unstrip() * * @private */ function insertStripItem( $text, &$state ) { $rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString(); if ( !$state ) { $state = array(); } $state['item'][$rnd] = $text; return $rnd; } /** * Interface with html tidy, used if $wgUseTidy = true. * If tidy isn't able to correct the markup, the original will be * returned in all its glory with a warning comment appended. * * Either the external tidy program or the in-process tidy extension * will be used depending on availability. Override the default * $wgTidyInternal setting to disable the internal if it's not working. * * @param string $text Hideous HTML input * @return string Corrected HTML output * @public * @static */ function tidy( $text ) { global $wgTidyInternal; $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.'<head><title>test</title></head><body>'.$text.'</body></html>'; if( $wgTidyInternal ) { $correctedtext = Parser::internalTidy( $wrappedtext ); } else { $correctedtext = Parser::externalTidy( $wrappedtext ); } if( is_null( $correctedtext ) ) { wfDebug( "Tidy error detected!\n" ); return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; } return $correctedtext; } /** * Spawn an external HTML tidy process and get corrected markup back from it. * * @private * @static */ function externalTidy( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; $fname = 'Parser::externalTidy'; wfProfileIn( $fname ); $cleansource = ''; $opts = ' -utf8'; $descriptorspec = array( 0 => array('pipe', 'r'), 1 => array('pipe', 'w'), 2 => array('file', '/dev/null', 'a') ); $pipes = array(); $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); if (is_resource($process)) { // Theoretically, this style of communication could cause a deadlock // here. If the stdout buffer fills up, then writes to stdin could // block. This doesn't appear to happen with tidy, because tidy only // writes to stdout after it's finished reading from stdin. Search // for tidyParseStdin and tidySaveStdout in console/tidy.c fwrite($pipes[0], $text); fclose($pipes[0]); while (!feof($pipes[1])) { $cleansource .= fgets($pipes[1], 1024); } fclose($pipes[1]); proc_close($process); } wfProfileOut( $fname ); if( $cleansource == '' && $text != '') { // Some kind of error happened, so we couldn't get the corrected text. // Just give up; we'll use the source text and append a warning. return null; } else { return $cleansource; } } /** * Use the HTML tidy PECL extension to use the tidy library in-process, * saving the overhead of spawning a new process. Currently written to * the PHP 4.3.x version of the extension, may not work on PHP 5. * * 'pear install tidy' should be able to compile the extension module. * * @private * @static */ function internalTidy( $text ) { global $wgTidyConf; $fname = 'Parser::internalTidy'; wfProfileIn( $fname ); tidy_load_config( $wgTidyConf ); tidy_set_encoding( 'utf8' ); tidy_parse_string( $text ); tidy_clean_repair(); if( tidy_get_status() == 2 ) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output(); } wfProfileOut( $fname ); return $cleansource; } /** * parse the wiki syntax used to render tables * * @private */ function doTableStuff ( $t ) { $fname = 'Parser::doTableStuff'; wfProfileIn( $fname ); $t = explode ( "\n" , $t ) ; $td = array () ; # Is currently a td tag open? $ltd = array () ; # Was it TD or TH? $tr = array () ; # Is currently a tr tag open? $ltr = array () ; # tr attributes $has_opened_tr = array(); # Did this table open a <tr> element? $indent_level = 0; # indent level of the table foreach ( $t AS $k => $x ) { $x = trim ( $x ) ; $fc = substr ( $x , 0 , 1 ) ; if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) { $indent_level = strlen( $matches[1] ); $attributes = $this->unstripForHTML( $matches[2] ); $t[$k] = str_repeat( '<dl><dd>', $indent_level ) . '<table' . Sanitizer::fixTagAttributes ( $attributes, 'table' ) . '>' ; array_push ( $td , false ) ; array_push ( $ltd , '' ) ; array_push ( $tr , false ) ; array_push ( $ltr , '' ) ; array_push ( $has_opened_tr, false ); } else if ( count ( $td ) == 0 ) { } # Don't do any of the following else if ( '|}' == substr ( $x , 0 , 2 ) ) { $z = "</table>" . substr ( $x , 2); $l = array_pop ( $ltd ) ; if ( !array_pop ( $has_opened_tr ) ) $z = "<tr><td></td></tr>" . $z ; if ( array_pop ( $tr ) ) $z = '</tr>' . $z ; if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ; array_pop ( $ltr ) ; $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level ); } else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |--------------- $x = substr ( $x , 1 ) ; while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ; $z = '' ; $l = array_pop ( $ltd ) ; array_pop ( $has_opened_tr ); array_push ( $has_opened_tr , true ) ; if ( array_pop ( $tr ) ) $z = '</tr>' . $z ; if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ; array_pop ( $ltr ) ; $t[$k] = $z ; array_push ( $tr , false ) ; array_push ( $td , false ) ; array_push ( $ltd , '' ) ; $attributes = $this->unstripForHTML( $x ); array_push ( $ltr , Sanitizer::fixTagAttributes ( $attributes, 'tr' ) ) ; } else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption # $x is a table row if ( '|+' == substr ( $x , 0 , 2 ) ) { $fc = '+' ; $x = substr ( $x , 1 ) ; } $after = substr ( $x , 1 ) ; if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ; // Split up multiple cells on the same line. // FIXME: This can result in improper nesting of tags processed // by earlier parser steps, but should avoid splitting up eg // attribute values containing literal "||". $after = wfExplodeMarkup( '||', $after ); $t[$k] = '' ; # Loop through each table cell foreach ( $after AS $theline ) { $z = '' ; if ( $fc != '+' ) { $tra = array_pop ( $ltr ) ; if ( !array_pop ( $tr ) ) $z = '<tr'.$tra.">\n" ; array_push ( $tr , true ) ; array_push ( $ltr , '' ) ; array_pop ( $has_opened_tr ); array_push ( $has_opened_tr , true ) ; } $l = array_pop ( $ltd ) ; if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ; if ( $fc == '|' ) $l = 'td' ; else if ( $fc == '!' ) $l = 'th' ; else if ( $fc == '+' ) $l = 'caption' ; else $l = '' ; array_push ( $ltd , $l ) ; # Cell parameters $y = explode ( '|' , $theline , 2 ) ; # Note that a '|' inside an invalid link should not # be mistaken as delimiting cell parameters if ( strpos( $y[0], '[[' ) !== false ) { $y = array ($theline); } if ( count ( $y ) == 1 )
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?