📄 php4.class.kses.php
字号:
* This method does a lot of work. It parses an attribute list into an array
* with attribute data, and tries to do the right thing even if it gets weird
* input. It will add quotes around attribute values that don't have any quotes
* or apostrophes around them, to make it easier to produce HTML code that will
* conform to W3C's HTML specification. It will also remove bad URL protocols
* from attribute values.
*
* @access private
* @param string $attr Text containing tag attributes for parsing
* @return array Associative array containing data on attribute and value
* @since PHP4 OOP 0.0.1
*/
function _hair($attr)
{
$attrarr = array();
$mode = 0;
$attrname = '';
# Loop through the whole attribute list
while (strlen($attr) != 0)
{
# Was the last operation successful?
$working = 0;
switch ($mode)
{
case 0: # attribute name, href for instance
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
{
$attrname = $match[1];
$working = $mode = 1;
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
}
break;
case 1: # equals sign or valueless ("selected")
if (preg_match('/^\s*=\s*/', $attr)) # equals sign
{
$working = 1;
$mode = 2;
$attr = preg_replace('/^\s*=\s*/', '', $attr);
break;
}
if (preg_match('/^\s+/', $attr)) # valueless
{
$working = 1;
$mode = 0;
$attrarr[] = array(
'name' => $attrname,
'value' => '',
'whole' => $attrname,
'vless' => 'y'
);
$attr = preg_replace('/^\s+/', '', $attr);
}
break;
case 2: # attribute value, a URL after href= for instance
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
{
$thisval = $this->_bad_protocol($match[1]);
$attrarr[] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname=\"$thisval\"",
'vless' => 'n'
);
$working = 1;
$mode = 0;
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
break;
}
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
{
$thisval = $this->_bad_protocol($match[1]);
$attrarr[] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname='$thisval'",
'vless' => 'n'
);
$working = 1;
$mode = 0;
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
break;
}
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
{
$thisval = $this->_bad_protocol($match[1]);
$attrarr[] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname=\"$thisval\"",
'vless' => 'n'
);
# We add quotes to conform to W3C's HTML spec.
$working = 1;
$mode = 0;
$attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
}
break;
}
if ($working == 0) # not well formed, remove and try again
{
$attr = $this->_html_error($attr);
$mode = 0;
}
}
# special case, for when the attribute list ends with a valueless
# attribute like "selected"
if ($mode == 1)
{
$attrarr[] = array(
'name' => $attrname,
'value' => '',
'whole' => $attrname,
'vless' => 'y'
);
}
return $attrarr;
}
/**
* This method removes disallowed protocols.
*
* This method removes all non-allowed protocols from the beginning of
* $string. It ignores whitespace and the case of the letters, and it does
* understand HTML entities. It does its work in a while loop, so it won't be
* fooled by a string like "javascript:javascript:alert(57)".
*
* @access private
* @param string $string String to check for protocols
* @return string String with removed protocols
* @since PHP4 OOP 0.0.1
*/
function _bad_protocol($string)
{
$string = $this->_no_null($string);
$string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
$string2 = $string.'a';
while ($string != $string2)
{
$string2 = $string;
$string = $this->_bad_protocol_once($string);
} # while
return $string;
}
/**
* Helper method used by _bad_protocol()
*
* This function searches for URL protocols at the beginning of $string, while
* handling whitespace and HTML entities.
* Function updated to fix security vulnerability (see http://projects.dokeos.com/index.php?do=details&task_id=2312)
*
* @access private
* @param string $string String to check for protocols
* @return string String with removed protocols
* @see _bad_protocol()
* @since PHP4 OOP 0.0.1
*/
function _bad_protocol_once($string)
{
$string2 = preg_split('/:|:|:/i', $string, 2);
if(isset($string2[1]) && !preg_match('%/\?%',$string2[0]))
{
$string = $this->_bad_protocol_once2($string2[0]).trim($string2[1]);
}
return $string;
}
/**
* Helper method used by _bad_protocol_once() regex
*
* This function processes URL protocols, checks to see if they're in the white-
* list or not, and returns different data depending on the answer.
*
* @access private
* @param string $string String to check for protocols
* @return string String with removed protocols
* @see _bad_protocol()
* @see _bad_protocol_once()
* @since PHP4 OOP 0.0.1
*/
function _bad_protocol_once2($string)
{
$string = $this->_decode_entities($string);
$string = preg_replace('/\s/', '', $string);
$string = $this->_no_null($string);
$string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
$string = strtolower($string);
$allowed = false;
if(is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0)
{
foreach ($this->allowed_protocols as $one_protocol)
{
if (strtolower($one_protocol) == $string)
{
$allowed = true;
break;
}
}
}
if ($allowed)
{
return "$string:";
}
else
{
return '';
}
}
/**
* This function performs different checks for attribute values.
*
* The currently implemented checks are "maxlen", "minlen", "maxval",
* "minval" and "valueless" with even more checks to come soon.
*
* @access private
* @param string $value The value of the attribute to be checked.
* @param string $vless Indicates whether the the value is supposed to be valueless
* @param string $checkname The check to be performed
* @param string $checkvalue The value that is to be checked against
* @return bool Indicates whether the check passed or not
* @since PHP4 OOP 0.0.1
*/
function _check_attr_val($value, $vless, $checkname, $checkvalue)
{
$ok = true;
switch (strtolower($checkname))
{
/**
* The maxlen check makes sure that the attribute value has a length not
* greater than the given value. This can be used to avoid Buffer Overflows
* in WWW clients and various Internet servers.
*/
case 'maxlen':
if (strlen($value) > $checkvalue)
{
$ok = false;
}
break;
/**
* The minlen check makes sure that the attribute value has a length not
* smaller than the given value.
*/
case 'minlen':
if (strlen($value) < $checkvalue)
{
$ok = false;
}
break;
/**
* The maxval check does two things: it checks that the attribute value is
* an integer from 0 and up, without an excessive amount of zeroes or
* whitespace (to avoid Buffer Overflows). It also checks that the attribute
* value is not greater than the given value.
* This check can be used to avoid Denial of Service attacks.
*/
case 'maxval':
if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
{
$ok = false;
}
if ($value > $checkvalue)
{
$ok = false;
}
break;
/**
* The minval check checks that the attribute value is a positive integer,
* and that it is not smaller than the given value.
*/
case 'minval':
if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
{
$ok = false;
}
if ($value < $checkvalue)
{
$ok = false;
}
break;
/**
* The valueless check checks if the attribute has a value
* (like <a href="blah">) or not (<option selected>). If the given value
* is a "y" or a "Y", the attribute must not have a value.
* If the given value is an "n" or an "N", the attribute must have one.
*/
case 'valueless':
if (strtolower($checkvalue) != $vless)
{
$ok = false;
}
break;
}
return $ok;
}
/**
* Changes \" to "
*
* This function changes the character sequence \" to just "
* It leaves all other slashes alone. It's really weird, but the quoting from
* preg_replace(//e) seems to require this.
*
* @access private
* @param string $string The string to be stripped.
* @return string string stripped of \"
* @since PHP4 OOP 0.0.1
*/
function _stripslashes($string)
{
return preg_replace('%\\\\"%', '"', $string);
}
/**
* helper method for _hair()
*
* This function deals with parsing errors in _hair(). The general plan is
* to remove everything to and including some whitespace, but it deals with
* quotes and apostrophes as well.
*
* @access private
* @param string $string The string to be stripped.
* @return string string stripped of whitespace
* @see _hair()
* @since PHP4 OOP 0.0.1
*/
function _html_error($string)
{
return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
}
/**
* Decodes numeric HTML entities
*
* This method decodes numeric HTML entities (A and A). It doesn't
* do anything with other entities like ä, but we don't need them in the
* URL protocol white listing system anyway.
*
* @access private
* @param string $value The entitiy to be decoded.
* @return string Decoded entity
* @since PHP4 OOP 0.0.1
*/
function _decode_entities($string)
{
$string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
$string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string);
return $string;
}
/**
* Returns PHP4 OOP version # of kses.
*
* Since this class has been refactored and documented and proven to work,
* I'm syncing the version number to procedural kses.
*
* @access public
* @return string Version number
* @since PHP4 OOP 0.0.1
*/
function _version()
{
return 'PHP4 0.2.2 (OOP fork of procedural kses 0.2.2)';
}
}
}
?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -