sanitizer.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 1,185 行 · 第 1/3 页

PHP
1,185
字号
			# Single-quoted			return $set[4];		} elseif( isset( $set[3] ) ) {			# Double-quoted			return $set[3];		} elseif( !isset( $set[2] ) ) {			# In XHTML, attributes must have a value.			# For 'reduced' form, return explicitly the attribute name here.			return $set[1];		} else {			throw new MWException( "Tag conditions not met. This should never happen and is a bug." );		}	}	/**	 * Normalize whitespace and character references in an XML source-	 * encoded text for an attribute value.	 *	 * See http://www.w3.org/TR/REC-xml/#AVNormalize for background,	 * but note that we're not returning the value, but are returning	 * XML source fragments that will be slapped into output.	 *	 * @param string $text	 * @return string	 * @private	 */	function normalizeAttributeValue( $text ) {		return str_replace( '"', '&quot;',			preg_replace(				'/\r\n|[\x20\x0d\x0a\x09]/',				' ',				Sanitizer::normalizeCharReferences( $text ) ) );	}	/**	 * Ensure that any entities and character references are legal	 * for XML and XHTML specifically. Any stray bits will be	 * &amp;-escaped to result in a valid text fragment.	 *	 * a. any named char refs must be known in XHTML	 * b. any numeric char refs must be legal chars, not invalid or forbidden	 * c. use &#x, not &#X	 * d. fix or reject non-valid attributes	 *	 * @param string $text	 * @return string	 * @private	 */	function normalizeCharReferences( $text ) {		return preg_replace_callback(			MW_CHAR_REFS_REGEX,			array( 'Sanitizer', 'normalizeCharReferencesCallback' ),			$text );	}	/**	 * @param string $matches	 * @return string	 */	function normalizeCharReferencesCallback( $matches ) {		$ret = null;		if( $matches[1] != '' ) {			$ret = Sanitizer::normalizeEntity( $matches[1] );		} elseif( $matches[2] != '' ) {			$ret = Sanitizer::decCharReference( $matches[2] );		} elseif( $matches[3] != ''  ) {			$ret = Sanitizer::hexCharReference( $matches[3] );		} elseif( $matches[4] != '' ) {			$ret = Sanitizer::hexCharReference( $matches[4] );		}		if( is_null( $ret ) ) {			return htmlspecialchars( $matches[0] );		} else {			return $ret;		}	}	/**	 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,	 * return the named entity reference as is. Otherwise, returns	 * HTML-escaped text of pseudo-entity source (eg &amp;foo;)	 *	 * @param string $name	 * @return string	 */	function normalizeEntity( $name ) {		global $wgHtmlEntities;		if( isset( $wgHtmlEntities[$name] ) ) {			return "&$name;";		} else {			return "&amp;$name;";		}	}	function decCharReference( $codepoint ) {		$point = intval( $codepoint );		if( Sanitizer::validateCodepoint( $point ) ) {			return sprintf( '&#%d;', $point );		} else {			return null;		}	}	function hexCharReference( $codepoint ) {		$point = hexdec( $codepoint );		if( Sanitizer::validateCodepoint( $point ) ) {			return sprintf( '&#x%x;', $point );		} else {			return null;		}	}	/**	 * Returns true if a given Unicode codepoint is a valid character in XML.	 * @param int $codepoint	 * @return bool	 */	function validateCodepoint( $codepoint ) {		return ($codepoint ==    0x09)			|| ($codepoint ==    0x0a)			|| ($codepoint ==    0x0d)			|| ($codepoint >=    0x20 && $codepoint <=   0xd7ff)			|| ($codepoint >=  0xe000 && $codepoint <=   0xfffd)			|| ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);	}	/**	 * Decode any character references, numeric or named entities,	 * in the text and return a UTF-8 string.	 *	 * @param string $text	 * @return string	 * @public	 */	function decodeCharReferences( $text ) {		return preg_replace_callback(			MW_CHAR_REFS_REGEX,			array( 'Sanitizer', 'decodeCharReferencesCallback' ),			$text );	}	/**	 * @param string $matches	 * @return string	 */	function decodeCharReferencesCallback( $matches ) {		if( $matches[1] != '' ) {			return Sanitizer::decodeEntity( $matches[1] );		} elseif( $matches[2] != '' ) {			return  Sanitizer::decodeChar( intval( $matches[2] ) );		} elseif( $matches[3] != ''  ) {			return  Sanitizer::decodeChar( hexdec( $matches[3] ) );		} elseif( $matches[4] != '' ) {			return  Sanitizer::decodeChar( hexdec( $matches[4] ) );		}		# Last case should be an ampersand by itself		return $matches[0];	}	/**	 * Return UTF-8 string for a codepoint if that is a valid	 * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.	 * @param int $codepoint	 * @return string	 * @private	 */	function decodeChar( $codepoint ) {		if( Sanitizer::validateCodepoint( $codepoint ) ) {			return codepointToUtf8( $codepoint );		} else {			return UTF8_REPLACEMENT;		}	}	/**	 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,	 * return the UTF-8 encoding of that character. Otherwise, returns	 * pseudo-entity source (eg &foo;)	 *	 * @param string $name	 * @return string	 */	function decodeEntity( $name ) {		global $wgHtmlEntities;		if( isset( $wgHtmlEntities[$name] ) ) {			return codepointToUtf8( $wgHtmlEntities[$name] );		} else {			return "&$name;";		}	}	/**	 * Fetch the whitelist of acceptable attributes for a given	 * element name.	 *	 * @param string $element	 * @return array	 */	function attributeWhitelist( $element ) {		static $list;		if( !isset( $list ) ) {			$list = Sanitizer::setupAttributeWhitelist();		}		return isset( $list[$element] )			? $list[$element]			: array();	}	/**	 * @return array	 */	function setupAttributeWhitelist() {		$common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );		$block = array_merge( $common, array( 'align' ) );		$tablealign = array( 'align', 'char', 'charoff', 'valign' );		$tablecell = array( 'abbr',		                    'axis',		                    'headers',		                    'scope',		                    'rowspan',		                    'colspan',		                    'nowrap', # deprecated		                    'width',  # deprecated		                    'height', # deprecated		                    'bgcolor' # deprecated		                    );		# Numbers refer to sections in HTML 4.01 standard describing the element.		# See: http://www.w3.org/TR/html4/		$whitelist = array (			# 7.5.4			'div'        => $block,			'center'     => $common, # deprecated			'span'       => $block, # ??			# 7.5.5			'h1'         => $block,			'h2'         => $block,			'h3'         => $block,			'h4'         => $block,			'h5'         => $block,			'h6'         => $block,			# 7.5.6			# address			# 8.2.4			# bdo			# 9.2.1			'em'         => $common,			'strong'     => $common,			'cite'       => $common,			# dfn			'code'       => $common,			# samp			# kbd			'var'        => $common,			# abbr			# acronym			# 9.2.2			'blockquote' => array_merge( $common, array( 'cite' ) ),			# q			# 9.2.3			'sub'        => $common,			'sup'        => $common,			# 9.3.1			'p'          => $block,			# 9.3.2			'br'         => array( 'id', 'class', 'title', 'style', 'clear' ),			# 9.3.4			'pre'        => array_merge( $common, array( 'width' ) ),			# 9.4			'ins'        => array_merge( $common, array( 'cite', 'datetime' ) ),			'del'        => array_merge( $common, array( 'cite', 'datetime' ) ),			# 10.2			'ul'         => array_merge( $common, array( 'type' ) ),			'ol'         => array_merge( $common, array( 'type', 'start' ) ),			'li'         => array_merge( $common, array( 'type', 'value' ) ),			# 10.3			'dl'         => $common,			'dd'         => $common,			'dt'         => $common,			# 11.2.1			'table'      => array_merge( $common,								array( 'summary', 'width', 'border', 'frame',											 'rules', 'cellspacing', 'cellpadding',											 'align', 'bgcolor', 'frame', 'rules',											 'border' ) ),			# 11.2.2			'caption'    => array_merge( $common, array( 'align' ) ),			# 11.2.3			'thead'      => array_merge( $common, $tablealign ),			'tfoot'      => array_merge( $common, $tablealign ),			'tbody'      => array_merge( $common, $tablealign ),			# 11.2.4			'colgroup'   => array_merge( $common, array( 'span', 'width' ), $tablealign ),			'col'        => array_merge( $common, array( 'span', 'width' ), $tablealign ),			# 11.2.5			'tr'         => array_merge( $common, array( 'bgcolor' ), $tablealign ),			# 11.2.6			'td'         => array_merge( $common, $tablecell, $tablealign ),			'th'         => array_merge( $common, $tablecell, $tablealign ),			# 15.2.1			'tt'         => $common,			'b'          => $common,			'i'          => $common,			'big'        => $common,			'small'      => $common,			'strike'     => $common,			's'          => $common,			'u'          => $common,			# 15.2.2			'font'       => array_merge( $common, array( 'size', 'color', 'face' ) ),			# basefont			# 15.3			'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),			# XHTML Ruby annotation text module, simple ruby only.			# http://www.w3c.org/TR/ruby/			'ruby'       => $common,			# rbc			# rtc			'rb'         => $common,			'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),			'rp'         => $common,			);		return $whitelist;	}	/**	 * Take a fragment of (potentially invalid) HTML and return	 * a version with any tags removed, encoded suitably for literal	 * inclusion in an attribute value.	 *	 * @param string $text HTML fragment	 * @return string	 */	function stripAllTags( $text ) {		# Actual <tags>		$text = preg_replace( '/ < .*? > /x', '', $text );		# Normalize &entities and whitespace		$text = Sanitizer::normalizeAttributeValue( $text );		# Will be placed into "double-quoted" attributes,		# make sure remaining bits are safe.		$text = str_replace(			array('<', '>', '"'),			array('&lt;', '&gt;', '&quot;'),			$text );		return $text;	}	/**	 * Hack up a private DOCTYPE with HTML's standard entity declarations.	 * PHP 4 seemed to know these if you gave it an HTML doctype, but	 * PHP 5.1 doesn't.	 *	 * Use for passing XHTML fragments to PHP's XML parsing functions	 *	 * @return string	 * @static	 */	function hackDocType() {		global $wgHtmlEntities;		$out = "<!DOCTYPE html [\n";		foreach( $wgHtmlEntities as $entity => $codepoint ) {			$out .= "<!ENTITY $entity \"&#$codepoint;\">";		}		$out .= "]>\n";		return $out;	}}?>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?