safe.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 670 行 · 第 1/2 页

PHP
670
字号
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */

/**
 * HTML_Safe Parser
 *
 * PHP versions 4 and 5
 *
 * @category   HTML
 * @package    HTML_Safe
 * @author     Roman Ivanov <thingol@mail.ru>
 * @copyright  2004-2005 Roman Ivanov
 * @license    http://www.debian.org/misc/bsd.license  BSD License (3 Clause)
 * @version    CVS: $Id:$
 * @link       http://pear.php.net/package/HTML_Safe
 */


/**
 * This package requires HTMLSax3 package
 */
require_once dirname(dirname(__FILE__)).'/XML/HTMLSax3.php';

 
/**
 *
 * HTML_Safe Parser
 *
 * This parser strips down all potentially dangerous content within HTML:
 * <ul>
 * <li>opening tag without its closing tag</li>
 * <li>closing tag without its opening tag</li>
 * <li>any of these tags: "base", "basefont", "head", "html", "body", "applet", 
 * "object", "iframe", "frame", "frameset", "script", "layer", "ilayer", "embed", 
 * "bgsound", "link", "meta", "style", "title", "blink", "xml" etc.</li>
 * <li>any of these attributes: on*, data*, dynsrc</li>
 * <li>javascript:/vbscript:/about: etc. protocols</li>
 * <li>expression/behavior etc. in styles</li>
 * <li>any other active content</li>
 * </ul>
 * It also tries to convert code to XHTML valid, but htmltidy is far better 
 * solution for this task.
 *
 * <b>Example:</b>
 * <pre>
 * $parser =& new HTML_Safe();
 * $result = $parser->parse($doc);
 * </pre>
 *
 * @category   HTML
 * @package    HTML_Safe
 * @author     Roman Ivanov <thingol@mail.ru>
 * @copyright  1997-2005 Roman Ivanov
 * @license    http://www.debian.org/misc/bsd.license  BSD License (3 Clause)
 * @version    Release: @package_version@
 * @link       http://pear.php.net/package/HTML_Safe
 */
class HTML_Safe 
{
    /**
     * Storage for resulting HTML output
     *
     * @var string
     * @access private
     */
    var $_xhtml = '';
    
    /**
     * Array of counters for each tag
     *
     * @var array
     * @access private
     */
    var $_counter = array();
    
    /**
     * Stack of unclosed tags
     *
     * @var array
     * @access private
     */
    var $_stack = array();
    
    /**
     * Array of counters for tags that must be deleted with all content
     *
     * @var array
     * @access private
     */
    var $_dcCounter = array();
    
    /**
     * Stack of unclosed tags that must be deleted with all content
     *
     * @var array
     * @access private
     */
    var $_dcStack = array();
    
    /**
     * Stores level of list (ol/ul) nesting
     *
     * @var int
     * @access private
     */
    var $_listScope = 0; 
    
    /**
     * Stack of unclosed list tags 
     *
     * @var array
     * @access private
     */
    var $_liStack = array();

    /**
     * Array of prepared regular expressions for protocols (schemas) matching
     *
     * @var array
     * @access private
     */
    var $_protoRegexps = array();
    
    /**
     * Array of prepared regular expressions for CSS matching
     *
     * @var array
     * @access private
     */
    var $_cssRegexps = array();

    /**
     * List of single tags ("<tag />")
     *
     * @var array
     * @access public
     */
    var $singleTags = array('area', 'br', 'img', 'input', 'hr', 'wbr', );

    /**
     * List of dangerous tags (such tags will be deleted)
     *
     * @var array
     * @access public
     */
    var $deleteTags = array(
        'applet', 'base',   'basefont', 'bgsound', 'blink',  'body', 
        'embed',  'frame',  'frameset', 'head',    'html',   'ilayer', 
        'iframe', 'layer',  'link',     'meta',    'object', 'style', 
        'title',  'script', 
        );

    /**
     * List of dangerous tags (such tags will be deleted, and all content 
     * inside this tags will be also removed)
     *
     * @var array
     * @access public
     */
    var $deleteTagsContent = array('script', 'style', 'title', 'xml', );

    /**
     * Type of protocols filtering ('white' or 'black')
     *
     * @var string
     * @access public
     */
    var $protocolFiltering = 'white';

    /**
     * List of "dangerous" protocols (used for blacklist-filtering)
     *
     * @var array
     * @access public
     */
    var $blackProtocols = array(
        'about',   'chrome',     'data',       'disk',     'hcp',     
        'help',    'javascript', 'livescript', 'lynxcgi',  'lynxexec', 
        'ms-help', 'ms-its',     'mhtml',      'mocha',    'opera',   
        'res',     'resource',   'shell',      'vbscript', 'view-source', 
        'vnd.ms.radio',          'wysiwyg', 
        );

    /**
     * List of "safe" protocols (used for whitelist-filtering)
     *
     * @var array
     * @access public
     */
    var $whiteProtocols = array(
        'ed2k',   'file', 'ftp',  'gopher', 'http',  'https', 
        'irc',    'mailto', 'news', 'nntp', 'telnet', 'webcal', 
        'xmpp',   'callto',
        );

    /**
     * List of attributes that can contain protocols
     *
     * @var array
     * @access public
     */
    var $protocolAttributes = array(
        'action', 'background', 'codebase', 'dynsrc', 'href', 'lowsrc', 'src', 
        );

    /**
     * List of dangerous CSS keywords
     *
     * Whole style="" attribute will be removed, if parser will find one of 
     * these keywords
     *
     * @var array
     * @access public
     */
    var $cssKeywords = array(
        'absolute', 'behavior',       'behaviour',   'content', 'expression', 
        'fixed',    'include-source', 'moz-binding',
        );

    /**
     * List of tags that can have no "closing tag"
     *
     * @var array
     * @access public
     * @deprecated XHTML does not allow such tags
     */
    var $noClose = array();

    /**
     * List of block-level tags that terminates paragraph
     *
     * Paragraph will be closed when this tags opened
     *
     * @var array
     * @access public
     */
    var $closeParagraph = array(
        'address', 'blockquote', 'center', 'dd',      'dir',       'div', 
        'dl',      'dt',         'h1',     'h2',      'h3',        'h4', 
        'h5',      'h6',         'hr',     'isindex', 'listing',   'marquee', 
        'menu',    'multicol',   'ol',     'p',       'plaintext', 'pre', 
        'table',   'ul',         'xmp', 
        );

    /**
     * List of table tags, all table tags outside a table will be removed
     *
     * @var array
     * @access public
     */
    var $tableTags = array(
        'caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 
        'thead',   'tr', 
        );

    /**
     * List of list tags
     *
     * @var array
     * @access public
     */
    var $listTags = array('dir', 'menu', 'ol', 'ul', 'dl', );

    /**
     * List of dangerous attributes
     *
     * @var array
     * @access public
     */
    var $attributes = array('dynsrc', 'id', 'name', );

    /**
     * List of allowed "namespaced" attributes
     *
     * @var array
     * @access public
     */
    var $attributesNS = array('xml:lang', );

    /**
     * Constructs class
     *
     * @access public
     */
    function HTML_Safe() 
    {
        //making regular expressions based on Proto & CSS arrays
        foreach ($this->blackProtocols as $proto) {
            $preg = "/[\s\x01-\x1F]*";
            for ($i=0; $i<strlen($proto); $i++) {
                $preg .= $proto{$i} . "[\s\x01-\x1F]*";
            }
            $preg .= ":/i";
            $this->_protoRegexps[] = $preg;
        }

        foreach ($this->cssKeywords as $css) {
            $this->_cssRegexps[] = '/' . $css . '/i';
        }
        return true;
    }

    /**
     * Handles the writing of attributes - called from $this->_openHandler()
     *
     * @param array $attrs array of attributes $name => $value
     * @return boolean
     * @access private
     */
    function _writeAttrs ($attrs) 
    {
        if (is_array($attrs)) {
            foreach ($attrs as $name => $value) {

                $name = strtolower($name);

                if (strpos($name, 'on') === 0) {
                    continue;
                }
                if (strpos($name, 'data') === 0) {
                    continue;
                }
                if (in_array($name, $this->attributes)) {
                    continue;
                }
                if (!preg_match("/^[a-z0-9]+$/i", $name)) {
                    if (!in_array($name, $this->attributesNS)) {
                        continue;
                    }
                }

                if (($value === TRUE) || (is_null($value))) {
                    $value = $name;
                }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?