safe.php
来自「php 开发的内容管理系统」· PHP 代码 · 共 670 行 · 第 1/2 页
PHP
670 行
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
/**
* HTML_Safe Parser
*
* PHP versions 4 and 5
*
* @category HTML
* @package HTML_Safe
* @author Roman Ivanov <thingol@mail.ru>
* @copyright 2004-2005 Roman Ivanov
* @license http://www.debian.org/misc/bsd.license BSD License (3 Clause)
* @version CVS: $Id:$
* @link http://pear.php.net/package/HTML_Safe
*/
/**
* This package requires HTMLSax3 package
*/
require_once dirname(dirname(__FILE__)).'/XML/HTMLSax3.php';
/**
*
* HTML_Safe Parser
*
* This parser strips down all potentially dangerous content within HTML:
* <ul>
* <li>opening tag without its closing tag</li>
* <li>closing tag without its opening tag</li>
* <li>any of these tags: "base", "basefont", "head", "html", "body", "applet",
* "object", "iframe", "frame", "frameset", "script", "layer", "ilayer", "embed",
* "bgsound", "link", "meta", "style", "title", "blink", "xml" etc.</li>
* <li>any of these attributes: on*, data*, dynsrc</li>
* <li>javascript:/vbscript:/about: etc. protocols</li>
* <li>expression/behavior etc. in styles</li>
* <li>any other active content</li>
* </ul>
* It also tries to convert code to XHTML valid, but htmltidy is far better
* solution for this task.
*
* <b>Example:</b>
* <pre>
* $parser =& new HTML_Safe();
* $result = $parser->parse($doc);
* </pre>
*
* @category HTML
* @package HTML_Safe
* @author Roman Ivanov <thingol@mail.ru>
* @copyright 1997-2005 Roman Ivanov
* @license http://www.debian.org/misc/bsd.license BSD License (3 Clause)
* @version Release: @package_version@
* @link http://pear.php.net/package/HTML_Safe
*/
class HTML_Safe
{
/**
* Storage for resulting HTML output
*
* @var string
* @access private
*/
var $_xhtml = '';
/**
* Array of counters for each tag
*
* @var array
* @access private
*/
var $_counter = array();
/**
* Stack of unclosed tags
*
* @var array
* @access private
*/
var $_stack = array();
/**
* Array of counters for tags that must be deleted with all content
*
* @var array
* @access private
*/
var $_dcCounter = array();
/**
* Stack of unclosed tags that must be deleted with all content
*
* @var array
* @access private
*/
var $_dcStack = array();
/**
* Stores level of list (ol/ul) nesting
*
* @var int
* @access private
*/
var $_listScope = 0;
/**
* Stack of unclosed list tags
*
* @var array
* @access private
*/
var $_liStack = array();
/**
* Array of prepared regular expressions for protocols (schemas) matching
*
* @var array
* @access private
*/
var $_protoRegexps = array();
/**
* Array of prepared regular expressions for CSS matching
*
* @var array
* @access private
*/
var $_cssRegexps = array();
/**
* List of single tags ("<tag />")
*
* @var array
* @access public
*/
var $singleTags = array('area', 'br', 'img', 'input', 'hr', 'wbr', );
/**
* List of dangerous tags (such tags will be deleted)
*
* @var array
* @access public
*/
var $deleteTags = array(
'applet', 'base', 'basefont', 'bgsound', 'blink', 'body',
'embed', 'frame', 'frameset', 'head', 'html', 'ilayer',
'iframe', 'layer', 'link', 'meta', 'object', 'style',
'title', 'script',
);
/**
* List of dangerous tags (such tags will be deleted, and all content
* inside this tags will be also removed)
*
* @var array
* @access public
*/
var $deleteTagsContent = array('script', 'style', 'title', 'xml', );
/**
* Type of protocols filtering ('white' or 'black')
*
* @var string
* @access public
*/
var $protocolFiltering = 'white';
/**
* List of "dangerous" protocols (used for blacklist-filtering)
*
* @var array
* @access public
*/
var $blackProtocols = array(
'about', 'chrome', 'data', 'disk', 'hcp',
'help', 'javascript', 'livescript', 'lynxcgi', 'lynxexec',
'ms-help', 'ms-its', 'mhtml', 'mocha', 'opera',
'res', 'resource', 'shell', 'vbscript', 'view-source',
'vnd.ms.radio', 'wysiwyg',
);
/**
* List of "safe" protocols (used for whitelist-filtering)
*
* @var array
* @access public
*/
var $whiteProtocols = array(
'ed2k', 'file', 'ftp', 'gopher', 'http', 'https',
'irc', 'mailto', 'news', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto',
);
/**
* List of attributes that can contain protocols
*
* @var array
* @access public
*/
var $protocolAttributes = array(
'action', 'background', 'codebase', 'dynsrc', 'href', 'lowsrc', 'src',
);
/**
* List of dangerous CSS keywords
*
* Whole style="" attribute will be removed, if parser will find one of
* these keywords
*
* @var array
* @access public
*/
var $cssKeywords = array(
'absolute', 'behavior', 'behaviour', 'content', 'expression',
'fixed', 'include-source', 'moz-binding',
);
/**
* List of tags that can have no "closing tag"
*
* @var array
* @access public
* @deprecated XHTML does not allow such tags
*/
var $noClose = array();
/**
* List of block-level tags that terminates paragraph
*
* Paragraph will be closed when this tags opened
*
* @var array
* @access public
*/
var $closeParagraph = array(
'address', 'blockquote', 'center', 'dd', 'dir', 'div',
'dl', 'dt', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'hr', 'isindex', 'listing', 'marquee',
'menu', 'multicol', 'ol', 'p', 'plaintext', 'pre',
'table', 'ul', 'xmp',
);
/**
* List of table tags, all table tags outside a table will be removed
*
* @var array
* @access public
*/
var $tableTags = array(
'caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
'thead', 'tr',
);
/**
* List of list tags
*
* @var array
* @access public
*/
var $listTags = array('dir', 'menu', 'ol', 'ul', 'dl', );
/**
* List of dangerous attributes
*
* @var array
* @access public
*/
var $attributes = array('dynsrc', 'id', 'name', );
/**
* List of allowed "namespaced" attributes
*
* @var array
* @access public
*/
var $attributesNS = array('xml:lang', );
/**
* Constructs class
*
* @access public
*/
function HTML_Safe()
{
//making regular expressions based on Proto & CSS arrays
foreach ($this->blackProtocols as $proto) {
$preg = "/[\s\x01-\x1F]*";
for ($i=0; $i<strlen($proto); $i++) {
$preg .= $proto{$i} . "[\s\x01-\x1F]*";
}
$preg .= ":/i";
$this->_protoRegexps[] = $preg;
}
foreach ($this->cssKeywords as $css) {
$this->_cssRegexps[] = '/' . $css . '/i';
}
return true;
}
/**
* Handles the writing of attributes - called from $this->_openHandler()
*
* @param array $attrs array of attributes $name => $value
* @return boolean
* @access private
*/
function _writeAttrs ($attrs)
{
if (is_array($attrs)) {
foreach ($attrs as $name => $value) {
$name = strtolower($name);
if (strpos($name, 'on') === 0) {
continue;
}
if (strpos($name, 'data') === 0) {
continue;
}
if (in_array($name, $this->attributes)) {
continue;
}
if (!preg_match("/^[a-z0-9]+$/i", $name)) {
if (!in_array($name, $this->attributesNS)) {
continue;
}
}
if (($value === TRUE) || (is_null($value))) {
$value = $name;
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?