parser.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页

PHP
2,095
字号
			}			$marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';			$stripped .= $marker;			if ( $close === '/>' ) {				// Empty element tag, <tag />				$content = null;				$text = $inside;				$tail = null;			} else {				if( $element == '!--' ) {					$end = '/(-->)/';				} else {					$end = "/(<\\/$element\\s*>)/i";				}				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );				$content = $q[0];				if( count( $q ) < 3 ) {					# No end tag -- let it run out to the end of the text.					$tail = '';					$text = '';				} else {					$tail = $q[1];					$text = $q[2];				}			}						$matches[$marker] = array( $element,				$content,				Sanitizer::decodeTagAttributes( $attributes ),				"<$element$attributes$close$content$tail" );		}		return $stripped;	}	/**	 * Strips and renders nowiki, pre, math, hiero	 * If $render is set, performs necessary rendering operations on plugins	 * Returns the text, and fills an array with data needed in unstrip()	 * If the $state is already a valid strip state, it adds to the state	 *	 * @param bool $stripcomments when set, HTML comments <!-- like this -->	 *  will be stripped in addition to other tags. This is important	 *  for section editing, where these comments cause confusion when	 *  counting the sections in the wikisource	 * 	 * @param array dontstrip contains tags which should not be stripped;	 *  used to prevent stipping of <gallery> when saving (fixes bug 2700)	 *	 * @private	 */	function strip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {		$render = ($this->mOutputType == OT_HTML);		# Replace any instances of the placeholders		$uniq_prefix = $this->mUniqPrefix;		#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );		$commentState = array();				$elements = array_merge(			array( 'nowiki', 'gallery' ),			array_keys( $this->mTagHooks ) );		global $wgRawHtml;		if( $wgRawHtml ) {			$elements[] = 'html';		}		if( $this->mOptions->getUseTeX() ) {			$elements[] = 'math';		}				# Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)		foreach ( $elements AS $k => $v ) {			if ( !in_array ( $v , $dontstrip ) ) continue;			unset ( $elements[$k] );		}				$matches = array();		$text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );		foreach( $matches as $marker => $data ) {			list( $element, $content, $params, $tag ) = $data;			if( $render ) {				$tagName = strtolower( $element );				switch( $tagName ) {				case '!--':					// Comment					if( substr( $tag, -3 ) == '-->' ) {						$output = $tag;					} else {						// Unclosed comment in input.						// Close it so later stripping can remove it						$output = "$tag-->";					}					break;				case 'html':					if( $wgRawHtml ) {						$output = $content;						break;					}					// Shouldn't happen otherwise. :)				case 'nowiki':					$output = wfEscapeHTMLTagsOnly( $content );					break;				case 'math':					$output = MathRenderer::renderMath( $content );					break;				case 'gallery':					$output = $this->renderImageGallery( $content, $params );					break;				default:					if( isset( $this->mTagHooks[$tagName] ) ) {						$output = call_user_func_array( $this->mTagHooks[$tagName],							array( $content, $params, $this ) );					} else {						throw new MWException( "Invalid call hook $element" );					}				}			} else {				// Just stripping tags; keep the source				$output = $tag;			}			if( !$stripcomments && $element == '!--' ) {				$commentState[$marker] = $output;			} else {				$state[$element][$marker] = $output;			}		}				# Unstrip comments unless explicitly told otherwise.		# (The comments are always stripped prior to this point, so as to		# not invoke any extension tags / parser hooks contained within		# a comment.)		if ( !$stripcomments ) {			// Put them all back and forget them			$text = strtr( $text, $commentState );		}		return $text;	}	/**	 * Restores pre, math, and other extensions removed by strip()	 *	 * always call unstripNoWiki() after this one	 * @private	 */	function unstrip( $text, &$state ) {		if ( !is_array( $state ) ) {			return $text;		}		$replacements = array();		foreach( $state as $tag => $contentDict ) {			if( $tag != 'nowiki' && $tag != 'html' ) {				foreach( $contentDict as $uniq => $content ) {					$replacements[$uniq] = $content;				}			}		}		$text = strtr( $text, $replacements );		return $text;	}	/**	 * Always call this after unstrip() to preserve the order	 *	 * @private	 */	function unstripNoWiki( $text, &$state ) {		if ( !is_array( $state ) ) {			return $text;		}		$replacements = array();		foreach( $state as $tag => $contentDict ) {			if( $tag == 'nowiki' || $tag == 'html' ) {				foreach( $contentDict as $uniq => $content ) {					$replacements[$uniq] = $content;				}			}		}		$text = strtr( $text, $replacements );		return $text;	}	/**	 * Add an item to the strip state	 * Returns the unique tag which must be inserted into the stripped text	 * The tag will be replaced with the original text in unstrip()	 *	 * @private	 */	function insertStripItem( $text, &$state ) {		$rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString();		if ( !$state ) {			$state = array();		}		$state['item'][$rnd] = $text;		return $rnd;	}	/**	 * Interface with html tidy, used if $wgUseTidy = true.	 * If tidy isn't able to correct the markup, the original will be	 * returned in all its glory with a warning comment appended.	 *	 * Either the external tidy program or the in-process tidy extension	 * will be used depending on availability. Override the default	 * $wgTidyInternal setting to disable the internal if it's not working.	 *	 * @param string $text Hideous HTML input	 * @return string Corrected HTML output	 * @public	 * @static	 */	function tidy( $text ) {		global $wgTidyInternal;		$wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.'<head><title>test</title></head><body>'.$text.'</body></html>';		if( $wgTidyInternal ) {			$correctedtext = Parser::internalTidy( $wrappedtext );		} else {			$correctedtext = Parser::externalTidy( $wrappedtext );		}		if( is_null( $correctedtext ) ) {			wfDebug( "Tidy error detected!\n" );			return $text . "\n<!-- Tidy found serious XHTML errors -->\n";		}		return $correctedtext;	}	/**	 * Spawn an external HTML tidy process and get corrected markup back from it.	 *	 * @private	 * @static	 */	function externalTidy( $text ) {		global $wgTidyConf, $wgTidyBin, $wgTidyOpts;		$fname = 'Parser::externalTidy';		wfProfileIn( $fname );		$cleansource = '';		$opts = ' -utf8';		$descriptorspec = array(			0 => array('pipe', 'r'),			1 => array('pipe', 'w'),			2 => array('file', '/dev/null', 'a')		);		$pipes = array();		$process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);		if (is_resource($process)) {			// Theoretically, this style of communication could cause a deadlock			// here. If the stdout buffer fills up, then writes to stdin could			// block. This doesn't appear to happen with tidy, because tidy only			// writes to stdout after it's finished reading from stdin. Search			// for tidyParseStdin and tidySaveStdout in console/tidy.c			fwrite($pipes[0], $text);			fclose($pipes[0]);			while (!feof($pipes[1])) {				$cleansource .= fgets($pipes[1], 1024);			}			fclose($pipes[1]);			proc_close($process);		}		wfProfileOut( $fname );		if( $cleansource == '' && $text != '') {			// Some kind of error happened, so we couldn't get the corrected text.			// Just give up; we'll use the source text and append a warning.			return null;		} else {			return $cleansource;		}	}	/**	 * Use the HTML tidy PECL extension to use the tidy library in-process,	 * saving the overhead of spawning a new process. Currently written to	 * the PHP 4.3.x version of the extension, may not work on PHP 5.	 *	 * 'pear install tidy' should be able to compile the extension module.	 *	 * @private	 * @static	 */	function internalTidy( $text ) {		global $wgTidyConf;		$fname = 'Parser::internalTidy';		wfProfileIn( $fname );		tidy_load_config( $wgTidyConf );		tidy_set_encoding( 'utf8' );		tidy_parse_string( $text );		tidy_clean_repair();		if( tidy_get_status() == 2 ) {			// 2 is magic number for fatal error			// http://www.php.net/manual/en/function.tidy-get-status.php			$cleansource = null;		} else {			$cleansource = tidy_get_output();		}		wfProfileOut( $fname );		return $cleansource;	}	/**	 * parse the wiki syntax used to render tables	 *	 * @private	 */	function doTableStuff ( $t ) {		$fname = 'Parser::doTableStuff';		wfProfileIn( $fname );		$t = explode ( "\n" , $t ) ;		$td = array () ; # Is currently a td tag open?		$ltd = array () ; # Was it TD or TH?		$tr = array () ; # Is currently a tr tag open?		$ltr = array () ; # tr attributes		$has_opened_tr = array(); # Did this table open a <tr> element?		$indent_level = 0; # indent level of the table		foreach ( $t AS $k => $x )		{			$x = trim ( $x ) ;			$fc = substr ( $x , 0 , 1 ) ;			if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {				$indent_level = strlen( $matches[1] );				$attributes = $this->unstripForHTML( $matches[2] );				$t[$k] = str_repeat( '<dl><dd>', $indent_level ) .					'<table' . Sanitizer::fixTagAttributes ( $attributes, 'table' ) . '>' ;				array_push ( $td , false ) ;				array_push ( $ltd , '' ) ;				array_push ( $tr , false ) ;				array_push ( $ltr , '' ) ;				array_push ( $has_opened_tr, false );			}			else if ( count ( $td ) == 0 ) { } # Don't do any of the following			else if ( '|}' == substr ( $x , 0 , 2 ) ) {				$z = "</table>" . substr ( $x , 2);				$l = array_pop ( $ltd ) ;				if ( !array_pop ( $has_opened_tr ) ) $z = "<tr><td></td></tr>" . $z ;				if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;				if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;				array_pop ( $ltr ) ;				$t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );			}			else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------				$x = substr ( $x , 1 ) ;				while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;				$z = '' ;				$l = array_pop ( $ltd ) ;				array_pop ( $has_opened_tr );				array_push ( $has_opened_tr , true ) ;				if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;				if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;				array_pop ( $ltr ) ;				$t[$k] = $z ;				array_push ( $tr , false ) ;				array_push ( $td , false ) ;				array_push ( $ltd , '' ) ;				$attributes = $this->unstripForHTML( $x );				array_push ( $ltr , Sanitizer::fixTagAttributes ( $attributes, 'tr' ) ) ;			}			else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption				# $x is a table row				if ( '|+' == substr ( $x , 0 , 2 ) ) {					$fc = '+' ;					$x = substr ( $x , 1 ) ;				}				$after = substr ( $x , 1 ) ;				if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;								// Split up multiple cells on the same line.				// FIXME: This can result in improper nesting of tags processed				// by earlier parser steps, but should avoid splitting up eg				// attribute values containing literal "||".				$after = wfExplodeMarkup( '||', $after );								$t[$k] = '' ;				# Loop through each table cell				foreach ( $after AS $theline )				{					$z = '' ;					if ( $fc != '+' )					{						$tra = array_pop ( $ltr ) ;						if ( !array_pop ( $tr ) ) $z = '<tr'.$tra.">\n" ;						array_push ( $tr , true ) ;						array_push ( $ltr , '' ) ;						array_pop ( $has_opened_tr );						array_push ( $has_opened_tr , true ) ;					}					$l = array_pop ( $ltd ) ;					if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;					if ( $fc == '|' ) $l = 'td' ;					else if ( $fc == '!' ) $l = 'th' ;					else if ( $fc == '+' ) $l = 'caption' ;					else $l = '' ;					array_push ( $ltd , $l ) ;					# Cell parameters					$y = explode ( '|' , $theline , 2 ) ;					# Note that a '|' inside an invalid link should not					# be mistaken as delimiting cell parameters					if ( strpos( $y[0], '[[' ) !== false ) {						$y = array ($theline);					}					if ( count ( $y ) == 1 )

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?