htmlencoder.java

来自「anewssystem新闻发布系统集成使用了spring hibernate f」· Java 代码 · 共 339 行

JAVA
339
字号
package anni.core.web.taglib;

import java.io.UnsupportedEncodingException;

import java.net.URLDecoder;
import java.net.URLEncoder;


/**
 * Encodes text strings and URLs to be HTML-safe.
 * 来自http://jodd.sourceforge.net
 *
 * @author najgor@users.sourceforge.net
 * @author Lingo
 * @since 2007-03-17
 * @version 1.0
 */
public final class HtmlEncoder {
    /**
     * StringBuffer使用的,在原字符串上扩展的因数.
     */
    public static final float NEW_SIZE_FACTOR = 1.3f;

    /**
     * Lookup table for use in encode() method.
     *
     * @see #encode
     */
    private static final String[] TABLE_HTML = new String[256];

    /**
     * Lookup table for use in encodeTextXxx() methods.
     *
     * @see #encodeText
     * @see #encodeTextSmart
     * @see #encodeTextStrict
     */
    private static final String[] TABLE_HTML_STRICT = new String[256];

    static {
        for (int i = 0; i < 10; i++) {
            TABLE_HTML[i] = "&#00" + i + ";";
        }

        for (int i = 10; i < 32; i++) {
            TABLE_HTML[i] = "&#0" + i + ";";
        }

        for (int i = 32; i < 128; i++) {
            TABLE_HTML[i] = String.valueOf((char) i);
        }

        for (int i = 128; i < 256; i++) {
            TABLE_HTML[i] = "&#" + i + ";";
        }

        // special characters
        TABLE_HTML['\''] = "&#039;"; // apostrophe ('&apos;' doesn't work - it is not by the w3 specs).
        TABLE_HTML['\"'] = "&quot;"; // double quote.
        TABLE_HTML['&'] = "&amp;"; // ampersand.
        TABLE_HTML['<'] = "&lt;"; // lower than.
        TABLE_HTML['>'] = "&gt;"; // greater than.

        // strict table
        System.arraycopy(TABLE_HTML, 0, TABLE_HTML_STRICT, 0, 256);
        TABLE_HTML_STRICT[' '] = "&nbsp;"; // 空格.
        TABLE_HTML_STRICT['\n'] = "<br>"; // ascii 10.
        TABLE_HTML_STRICT['\r'] = "<br>"; // ascii 13.
    }

    /**
     * 工具类的私有构造方法.
     */
    private HtmlEncoder() {
    }

    // ---------------------------------------------------------------- encoding

    /**
     * Encode string to HTML-safe text. Extra characters are encoded as decimals,
     * and five special characters are replaced with their HTML values:
     * <li>' with &amp;#039;</li>
     * <li>" with &amp;quot;</li>
     * <li>&amp; with &amp;amp;</li>
     * <li>&lt; with &amp;lt;</li>
     * <li>&gt; with &amp;gt;</li>
     *
     * @param string input string
     *
     * @return HTML-safe string
     * @see #encodeText
     */
    public static String encode(String string) {
        if ((string == null) || (string.length() == 0)) {
            return "";
        }

        int n = string.length();
        StringBuffer buffer = new StringBuffer((int) (n * NEW_SIZE_FACTOR));
        int tableLen = TABLE_HTML.length;
        char c;

        for (int i = 0; i < n; i++) {
            c = string.charAt(i);

            if (c < tableLen) {
                buffer.append(TABLE_HTML[c]);
            } else {
                buffer.append("&#").append((int) c).append(';');
            }
        }

        return buffer.toString();
    }

    /**
     * Encodes text int HTML-safe text and preserves format. Additionaly, the following
     * characters are replaced:
     * <li>' ' with &amp;nbsp;</li>
     * <li>\n with &lt;br&gt;</li>
     * <li>\r with &lt;br&gt;</li>
     * <br><br>
     * Additionaly, this method takes care about CRLF and LF texts and handles
     * both.
     *
     * Common problem with this method is that spaces are not breakable, so they
     * may break the outline of the page.
     *
     * @param string input string
     *
     * @return HTML-safe format
     */
    public static String encodeTextStrict(String string) {
        if ((string == null) || (string.length() == 0)) {
            return "";
        }

        int n = string.length();
        StringBuffer buffer = new StringBuffer((int) (n * NEW_SIZE_FACTOR));
        int tableLen = TABLE_HTML_STRICT.length;
        char c = 0;
        char prev = 0;

        for (int i = 0; i < n; i++, prev = c) {
            c = string.charAt(i);

            if ((c == '\n') && (prev == '\r')) {
                continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
            }

            if (c < tableLen) {
                buffer.append(TABLE_HTML_STRICT[c]);
            } else {
                buffer.append("&#").append((int) c).append(';');
            }
        }

        return buffer.toString();
    }

    /**
     * Encodes text int HTML-safe text and preserves format except spaces.
     * Additionaly, the following characters are replaced:
     *
     * <li>\n with &lt;br&gt;</li>
     * <li>\r with &lt;br&gt;</li>
     * <br><br>
     * Additionaly, this method takes care about CRLF and LF texts and handles
     * both.
     *
     * @param string input string
     *
     * @return HTML-safe format
     */
    public static String encodeText(String string) {
        if ((string == null) || (string.length() == 0)) {
            return "";
        }

        int n = string.length();
        StringBuffer buffer = new StringBuffer((int) (n * NEW_SIZE_FACTOR));
        int tableLen = TABLE_HTML_STRICT.length;
        char c = 0;
        char prev = 0;

        for (int i = 0; i < n; i++, prev = c) {
            c = string.charAt(i);

            if (c == ' ') {
                buffer.append(' ');

                continue;
            }

            if ((c == '\n') && (prev == '\r')) {
                continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
            }

            if (c < tableLen) {
                buffer.append(TABLE_HTML_STRICT[c]);
            } else {
                buffer.append("&#").append((int) c).append(';');
            }
        }

        return buffer.toString();
    }

    /**
     * Encodes text int HTML-safe text and preserves format using smart spaces.
     * Additionaly, the following characters are replaced:
     *
     * <li>\n with &lt;br&gt;</li>
     * <li>\r with &lt;br&gt;</li>
     * <br><br>
     * Additionaly, this method takes care about CRLF and LF texts and handles
     * both.<br>
     *
     * This method is special since it preserves format, but with combination of
     * not-breakable spaces and common spaces, so breaks are availiable.
     *
     * @param string input string
     *
     * @return HTML-safe format
     */
    public static String encodeTextSmart(String string) {
        if ((string == null) || (string.length() == 0)) {
            return "";
        }

        int n = string.length();
        StringBuffer buffer = new StringBuffer((int) (n * NEW_SIZE_FACTOR));
        int tableLen = TABLE_HTML_STRICT.length;
        char c = 0;
        char prev = 0;
        boolean prevSpace = false;

        for (int i = 0; i < n; i++, prev = c) {
            c = string.charAt(i);

            if (c == ' ') {
                if (prev != ' ') {
                    prevSpace = false;
                }

                if (!prevSpace) {
                    buffer.append(' ');
                } else {
                    buffer.append("&nbsp;");
                }

                prevSpace = !prevSpace;

                continue;
            }

            if ((c == '\n') && (prev == '\r')) {
                continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
            }

            if (c < tableLen) {
                buffer.append(TABLE_HTML_STRICT[c]);
            } else {
                buffer.append("&#").append((int) c).append(';');
            }
        }

        return buffer.toString();
    }

    // ---------------------------------------------------------------- URL encode/decode

    /**
     * Encodes HTML JavaScript for page output using ISO-88591-1 encoding. Null
     * strings are converted to empty ones. Unfortunatelly, this encoding is not
     * comatible with the javascripts functions escape/unescape.
     *
     * @param string input string
     *
     * @return HTML ready string.
     */
    public static String encodeUrl(String string) {
        return encodeUrl(string, "ISO-8859-1");
    }

    /**
     * Encodes HTML JavaScript for page output. Null strings are converted to
     * empty ones.
     *
     * @param string   input string
     * @param encoding 编码
     *
     * @return HTML ready string.
     */
    public static String encodeUrl(String string, String encoding) {
        if (string == null) {
            return "";
        }

        try {
            return URLEncoder.encode(string, encoding);
        } catch (UnsupportedEncodingException e) {
            return null;
        }
    }

    /**
     * Encodes HTML JavaScript for page output using ISO-88591-1 encoding. Null
     * strings are converted to empty ones.
     *
     * @param string input
     *
     * @return HTML ready string.
     */
    public static String decodeUrl(String string) {
        return decodeUrl(string, "ISO-8859-1");
    }

    /**
     * Encodes HTML JavaScript for page output. Null strings are converted to empty ones.
     *
     * @param string   input
     * @param encoding encoding
     *
     * @return HTML ready string.
     */
    public static String decodeUrl(String string, String encoding) {
        if (string == null) {
            return "";
        }

        try {
            return URLDecoder.decode(string, encoding);
        } catch (UnsupportedEncodingException e) {
            return null;
        }
    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?