📄 php4.class.kses.php

📁 完美的在线教育系统
💻 PHP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
			 *	This method does a lot of work. It parses an attribute list into an array
			 *	with attribute data, and tries to do the right thing even if it gets weird
			 *	input. It will add quotes around attribute values that don't have any quotes
			 *	or apostrophes around them, to make it easier to produce HTML code that will
			 *	conform to W3C's HTML specification. It will also remove bad URL protocols
			 *	from attribute values.
			 *
			 *	@access private
			 *	@param string $attr Text containing tag attributes for parsing
			 *	@return array Associative array containing data on attribute and value
			 *	@since PHP4 OOP 0.0.1
			 */
			function _hair($attr)
			{
				$attrarr  = array();
				$mode     = 0;
				$attrname = '';

				# Loop through the whole attribute list

				while (strlen($attr) != 0)
				{
					# Was the last operation successful?
					$working = 0;

					switch ($mode)
					{
						case 0:	# attribute name, href for instance
							if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
							{
								$attrname = $match[1];
								$working = $mode = 1;
								$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
							}
							break;
						case 1:	# equals sign or valueless ("selected")
							if (preg_match('/^\s*=\s*/', $attr)) # equals sign
							{
								$working = 1;
								$mode    = 2;
								$attr    = preg_replace('/^\s*=\s*/', '', $attr);
								break;
							}
							if (preg_match('/^\s+/', $attr)) # valueless
							{
								$working   = 1;
								$mode      = 0;
								$attrarr[] = array(
									'name'  => $attrname,
									'value' => '',
									'whole' => $attrname,
									'vless' => 'y'
								);
								$attr      = preg_replace('/^\s+/', '', $attr);
							}
							break;
						case 2: # attribute value, a URL after href= for instance
							if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
							{
								$thisval   = $this->_bad_protocol($match[1]);
								$attrarr[] = array(
									'name'  => $attrname,
									'value' => $thisval,
									'whole' => "$attrname=\"$thisval\"",
									'vless' => 'n'
								);
								$working   = 1;
								$mode      = 0;
								$attr      = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
								break;
							}
							if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
							{
								$thisval   = $this->_bad_protocol($match[1]);
								$attrarr[] = array(
									'name'  => $attrname,
									'value' => $thisval,
									'whole' => "$attrname='$thisval'",
									'vless' => 'n'
								);
								$working   = 1;
								$mode      = 0;
								$attr      = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
								break;
							}
							if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
							{
								$thisval   = $this->_bad_protocol($match[1]);
								$attrarr[] = array(
									'name'  => $attrname,
									'value' => $thisval,
									'whole' => "$attrname=\"$thisval\"",
									'vless' => 'n'
								);
								# We add quotes to conform to W3C's HTML spec.
								$working   = 1;
								$mode      = 0;
								$attr      = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
							}
							break;
					}

					if ($working == 0) # not well formed, remove and try again
					{
						$attr = $this->_html_error($attr);
						$mode = 0;
					}
				}

				# special case, for when the attribute list ends with a valueless
				# attribute like "selected"
				if ($mode == 1)
				{
					$attrarr[] = array(
						'name'  => $attrname,
						'value' => '',
						'whole' => $attrname,
						'vless' => 'y'
					);
				}

				return $attrarr;
			}

			/**
			 *	This method removes disallowed protocols.
			 *
			 *	This method removes all non-allowed protocols from the beginning of
			 *	$string. It ignores whitespace and the case of the letters, and it does
			 *	understand HTML entities. It does its work in a while loop, so it won't be
			 *	fooled by a string like "javascript:javascript:alert(57)".
			 *
			 *	@access private
			 *	@param string $string String to check for protocols
			 *	@return string String with removed protocols
			 *	@since PHP4 OOP 0.0.1
			 */
			function _bad_protocol($string)
			{
				$string  = $this->_no_null($string);
				$string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
				$string2 = $string.'a';

				while ($string != $string2)
				{
					$string2 = $string;
					$string  = $this->_bad_protocol_once($string);
				} # while

				return $string;
			}

			/**
			 *	Helper method used by _bad_protocol()
			 *
			 *	This function searches for URL protocols at the beginning of $string, while
			 *	handling whitespace and HTML entities.
			 *  Function updated to fix security vulnerability (see http://projects.dokeos.com/index.php?do=details&task_id=2312)
			 *
			 *	@access private
			 *	@param string $string String to check for protocols
			 *	@return string String with removed protocols
			 *	@see _bad_protocol()
			 *	@since PHP4 OOP 0.0.1
			 */
			function _bad_protocol_once($string)
			{
				$string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
				if(isset($string2[1]) && !preg_match('%/\?%',$string2[0]))
				{
					$string = $this->_bad_protocol_once2($string2[0]).trim($string2[1]);
				}
				return $string;
			}
			/**
			 *	Helper method used by _bad_protocol_once() regex
			 *
			 *	This function processes URL protocols, checks to see if they're in the white-
			 *	list or not, and returns different data depending on the answer.
			 *
			 *	@access private
			 *	@param string $string String to check for protocols
			 *	@return string String with removed protocols
			 *	@see _bad_protocol()
			 *	@see _bad_protocol_once()
			 *	@since PHP4 OOP 0.0.1
			 */
			function _bad_protocol_once2($string)
			{
				$string = $this->_decode_entities($string);
				$string = preg_replace('/\s/', '', $string);
				$string = $this->_no_null($string);
				$string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
				$string = strtolower($string);

				$allowed = false;
				if(is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0)
				{
					foreach ($this->allowed_protocols as $one_protocol)
					{
						if (strtolower($one_protocol) == $string)
						{
							$allowed = true;
							break;
						}
					}
				}

				if ($allowed)
				{
					return "$string:";
				}
				else
				{
					return '';
				}
			}

			/**
			 *	This function performs different checks for attribute values.
			 *
			 *	The currently implemented checks are "maxlen", "minlen", "maxval",
			 *	"minval" and "valueless" with even more checks to come soon.
			 *
			 *	@access private
			 *	@param string $value The value of the attribute to be checked.
			 *	@param string $vless Indicates whether the the value is supposed to be valueless
			 *	@param string $checkname The check to be performed
			 *	@param string $checkvalue The value that is to be checked against
			 *	@return bool Indicates whether the check passed or not
			 *	@since PHP4 OOP 0.0.1
			 */
			function _check_attr_val($value, $vless, $checkname, $checkvalue)
			{
				$ok = true;

				switch (strtolower($checkname))
				{
					/**
					*	The maxlen check makes sure that the attribute value has a length not
					*	greater than the given value. This can be used to avoid Buffer Overflows
					*	in WWW clients and various Internet servers.
					*/
					case 'maxlen':
						if (strlen($value) > $checkvalue)
						{
							$ok = false;
						}
						break;

					/**
					*	The minlen check makes sure that the attribute value has a length not
					*	smaller than the given value.
					*/
					case 'minlen':
						if (strlen($value) < $checkvalue)
						{
							$ok = false;
						}
						break;

					/**
					*	The maxval check does two things: it checks that the attribute value is
					*	an integer from 0 and up, without an excessive amount of zeroes or
					*	whitespace (to avoid Buffer Overflows). It also checks that the attribute
					*	value is not greater than the given value.
					*	This check can be used to avoid Denial of Service attacks.
					*/
					case 'maxval':
						if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
						{
							$ok = false;
						}
						if ($value > $checkvalue)
						{
							$ok = false;
						}
						break;

					/**
					*	The minval check checks that the attribute value is a positive integer,
					*	and that it is not smaller than the given value.
					*/
					case 'minval':
						if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
						{
							$ok = false;
						}
						if ($value < $checkvalue)
						{
							$ok = false;
						}
						break;

					/**
					*	The valueless check checks if the attribute has a value
					*	(like <a href="blah">) or not (<option selected>). If the given value
					*	is a "y" or a "Y", the attribute must not have a value.
					*	If the given value is an "n" or an "N", the attribute must have one.
					*/
					case 'valueless':
					if (strtolower($checkvalue) != $vless)
					{
						$ok = false;
					}
					break;

				}

				return $ok;
			}

			/**
			 *	Changes \" to "
			 *
			 *	This function changes the character sequence  \"  to just  "
			 *	It leaves all other slashes alone. It's really weird, but the quoting from
			 *	preg_replace(//e) seems to require this.
			 *
			 *	@access private
			 *	@param string $string The string to be stripped.
			 *	@return string string stripped of \"
			 *	@since PHP4 OOP 0.0.1
			 */
			function _stripslashes($string)
			{
				return preg_replace('%\\\\"%', '"', $string);
			}

			/**
			 *	helper method for _hair()
			 *
			 *	This function deals with parsing errors in _hair(). The general plan is
			 *	to remove everything to and including some whitespace, but it deals with
			 *	quotes and apostrophes as well.
			 *
			 *	@access private
			 *	@param string $string The string to be stripped.
			 *	@return string string stripped of whitespace
			 *	@see _hair()
			 *	@since PHP4 OOP 0.0.1
			 */
			function _html_error($string)
			{
				return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
			}

			/**
			 *	Decodes numeric HTML entities
			 *
			 *	This method decodes numeric HTML entities (&#65; and &#x41;). It doesn't
			 *	do anything with other entities like &auml;, but we don't need them in the
			 *	URL protocol white listing system anyway.
			 *
			 *	@access private
			 *	@param string $value The entitiy to be decoded.
			 *	@return string Decoded entity
			 *	@since PHP4 OOP 0.0.1
			 */
			function _decode_entities($string)
			{
				$string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
				$string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string);
				return $string;
			}

			/**
			 *	Returns PHP4 OOP version # of kses.
			 *
			 *	Since this class has been refactored and documented and proven to work,
			 *	I'm syncing the version number to procedural kses.
			 *
			 *	@access public
			 *	@return string Version number
			 *	@since PHP4 OOP 0.0.1
			 */
			function _version()
			{
				return 'PHP4 0.2.2 (OOP fork of procedural kses 0.2.2)';
			}
		}



	}
?>
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -