📄 cmsencoder.java

📁 cms是开源的框架
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
     * like <code>&amp;#8364;</code>.<p>
     * 
     * This is required since a Java String is 
     * internally always stored as Unicode, meaning it can contain almost every character, but 
     * the HTML charset used might not support all such characters.<p>
     * 
     * @param input the input to encode for HTML
     * @param encoding the charset to encode the result with
     * @return the input with the encoded HTML entities
     * @see #decodeHtmlEntities(String, String)
     */
    public static String encodeHtmlEntities(String input, String encoding) {

        StringBuffer result = new StringBuffer(input.length() * 2);
        CharBuffer buffer = CharBuffer.wrap(input.toCharArray());
        Charset charset = Charset.forName(encoding);
        CharsetEncoder encoder = charset.newEncoder();
        for (int i = 0; i < buffer.length(); i++) {
            int c = buffer.get(i);
            if (c < 128) {
                // first 128 chars are contained in almost every charset
                result.append((char)c);
                // this is intendend as performance improvement since 
                // the canEncode() operation appears quite CPU heavy
            } else if (encoder.canEncode((char)c)) {
                // encoder can endoce this char
                result.append((char)c);
            } else {
                // append HTML entiry reference
                result.append(ENTITY_PREFIX);
                result.append(c);
                result.append(";");
            }
        }
        return result.toString();
    }

    /**
     * Encodes a string used as parameter in an uri in a way independent of other encodings/decodings applied later.<p>
     * 
     * Used to ensure that GET parameters are not wrecked by wrong or incompatible configuration settings.
     * In order to ensure this, the String is first encoded with html entities for any character that cannot encoded
     * in US-ASCII; additionally, the plus sign is also encoded to avoid problems with the white-space replacer.
     * Finally, the entity prefix is replaced with characters not used as delimiters in urls.<p>
     * 
     * @param input the parameter string
     * @return the encoded parameter string
     */
    public static String encodeParameter(String input) {

        String result = CmsEncoder.encodeHtmlEntities(input, CmsEncoder.ENCODING_US_ASCII);
        result = CmsStringUtil.substitute(result, "+", PLUS_ENTITY);
        return CmsStringUtil.substitute(result, ENTITY_PREFIX, ENTITY_REPLACEMENT);
    }

    /**
     * Encodes a String in a way that is compatible with the JavaScript escape function.
     * 
     * @param source The textstring to be encoded.
     * @param encoding the encoding type
     * @return The JavaScript escaped string.
     */
    public static String escape(String source, String encoding) {

        // the blank is encoded into "+" not "%20" when using standard encode call
        return CmsStringUtil.substitute(encode(source, encoding), "+", "%20");
    }

    /**
     * Escapes special characters in a HTML-String with their number-based 
     * entity representation, for example &amp; becomes &amp;#38;.<p>
     * 
     * A character <code>num</code> is replaced if<br>
     * <code>((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p>
     * 
     * @param source the String to escape
     * @return String the escaped String
     * 
     * @see #escapeXml(String)
     */
    public static String escapeHtml(String source) {

        int terminatorIndex;
        if (source == null) {
            return null;
        }
        StringBuffer result = new StringBuffer(source.length() * 2);
        for (int i = 0; i < source.length(); i++) {
            int ch = source.charAt(i);
            // avoid escaping already escaped characters            
            if (ch == 38) {
                terminatorIndex = source.indexOf(";", i);
                if (terminatorIndex > 0) {
                    if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) {
                        result.append(source.substring(i, terminatorIndex + 1));
                        // Skip remaining chars up to (and including) ";"
                        i = terminatorIndex;
                        continue;
                    }
                }
            }
            if ((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) {
                result.append(ENTITY_PREFIX);
                result.append(ch);
                result.append(";");
            } else {
                result.append((char)ch);
            }
        }
        return new String(result);
    }

    /**
     * Escapes non ASCII characters in a HTML-String with their number-based 
     * entity representation, for example &amp; becomes &amp;#38;.<p>
     * 
     * A character <code>num</code> is replaced if<br>
     * <code>(ch > 255)</code><p>
     * 
     * @param source the String to escape
     * @return String the escaped String
     * 
     * @see #escapeXml(String)
     */
    public static String escapeNonAscii(String source) {

        if (source == null) {
            return null;
        }
        StringBuffer result = new StringBuffer(source.length() * 2);
        for (int i = 0; i < source.length(); i++) {
            int ch = source.charAt(i);
            if (ch > 255) {
                result.append(ENTITY_PREFIX);
                result.append(ch);
                result.append(";");
            } else {
                result.append((char)ch);
            }
        }
        return new String(result);
    }

    /**
     * Encodes a String in a way that is compatible with the JavaScript escape function.
     * Muliple blanks are encoded _multiply _with %20.
     * 
     * @param source The textstring to be encoded.
     * @param encoding the encoding type
     * @return The JavaScript escaped string.
     */
    public static String escapeWBlanks(String source, String encoding) {

        if (CmsStringUtil.isEmpty(source)) {
            return source;
        }
        StringBuffer ret = new StringBuffer(source.length() * 2);

        // URLEncode the text string
        // this produces a very similar encoding to JavaSscript encoding, 
        // except the blank which is not encoded into "%20" instead of "+"

        String enc = encode(source, encoding);
        for (int z = 0; z < enc.length(); z++) {
            char c = enc.charAt(z);
            if (c == '+') {
                ret.append("%20");
            } else {
                ret.append(c);
            }
        }
        return ret.toString();
    }

    /**
     * Escapes a String so it may be printed as text content or attribute
     * value in a HTML page or an XML file.<p>
     * 
     * This method replaces the following characters in a String:
     * <ul>
     * <li><b>&lt;</b> with &amp;lt;
     * <li><b>&gt;</b> with &amp;gt;
     * <li><b>&amp;</b> with &amp;amp;
     * <li><b>&quot;</b> with &amp;quot;
     * </ul>
     * 
     * @param source the string to escape
     * @return the escaped string
     * 
     * @see #escapeHtml(String)
     */
    public static String escapeXml(String source) {

        return escapeXml(source, false);
    }

    /**
     * Escapes a String so it may be printed as text content or attribute
     * value in a HTML page or an XML file.<p>
     * 
     * This method replaces the following characters in a String:
     * <ul>
     * <li><b>&lt;</b> with &amp;lt;
     * <li><b>&gt;</b> with &amp;gt;
     * <li><b>&amp;</b> with &amp;amp;
     * <li><b>&quot;</b> with &amp;quot;
     * </ul>
     * 
     * @param source the string to escape
     * @param doubleEscape if <code>false</code>, all entities that already are escaped are left untouched
     * 
     * @return the escaped string
     * 
     * @see #escapeHtml(String)
     */
    public static String escapeXml(String source, boolean doubleEscape) {

        if (source == null) {
            return null;
        }
        StringBuffer result = new StringBuffer(source.length() * 2);

        for (int i = 0; i < source.length(); ++i) {
            char ch = source.charAt(i);
            switch (ch) {
                case '<':
                    result.append("&lt;");
                    break;
                case '>':
                    result.append("&gt;");
                    break;
                case '&':
                    // don't escape already escaped international and special characters
                    if (!doubleEscape) {
                        int terminatorIndex = source.indexOf(";", i);
                        if (terminatorIndex > 0) {
                            if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+")) {
                                result.append(ch);
                                break;
                            }
                        }
                    }
                    // note that to other "break" in the above "if" block
                    result.append("&amp;");
                    break;
                case '"':
                    result.append("&quot;");
                    break;
                default:
                    result.append(ch);
            }
        }
        return new String(result);
    }

    /**
     * Checks if a given encoding name is actually supported, and if so
     * resolves it to it's canonical name, if not it returns the given fallback 
     * value.<p> 
     * 
     * Charsets have a set of aliases. For example, valid aliases for "UTF-8"
     * are "UTF8", "utf-8" or "utf8". This method resolves any given valid charset name 
     * to it's "canonical" form, so that simple String comparison can be used
     * when checking charset names internally later.<p>
     * 
     * Please see <a href="http://www.iana.org/assignments/character-sets">http://www.iana.org/assignments/character-sets</a> 
     * for a list of valid charset alias names.<p>
     * 
     * @param encoding the encoding to check and resolve
     * @param fallback the fallback encoding scheme
     * @return the resolved encoding name, or the fallback value
     */
    public static String lookupEncoding(String encoding, String fallback) {

        String result = (String)m_encodingCache.get(encoding);
        if (result != null) {
            return result;
        }

        try {
            result = Charset.forName(encoding).name();
            m_encodingCache.put(encoding, result);
            return result;
        } catch (Throwable t) {
            // we will use the default value as fallback
        }

        return fallback;
    }

    /**
     * Re-decodes a String that has not been correctly decoded and thus has scrambled
     * character bytes.<p>
     * 
     * This is an equivalent to the JavaScript "decodeURIComponent" function.
     * It converts from the default "UTF-8" to the currently selected system encoding.<p>
     * 
     * @param input the String to convert
     * @return String the converted String
     */
    public static String redecodeUriComponent(String input) {

        if (input == null) {
            return input;
        }
        return new String(
            changeEncoding(input.getBytes(), ENCODING_UTF_8, OpenCms.getSystemInfo().getDefaultEncoding()));
    }

    /**
     * Decodes a String in a way that is compatible with the JavaScript 
     * unescape function.
     * 
     * @param source The String to be decoded.
     * @param encoding the encoding type
     * @return The JavaScript unescaped String.
     */
    public static String unescape(String source, String encoding) {

        if (source == null) {
            return null;
        }
        int len = source.length();
        // to use standard decoder we need to replace '+' with "%20" (space)
        StringBuffer preparedSource = new StringBuffer(len);
        for (int i = 0; i < len; i++) {
            char c = source.charAt(i);
            if (c == '+') {
                preparedSource.append("%20");
            } else {
                preparedSource.append(c);
            }
        }
        return decode(preparedSource.toString(), encoding);
    }
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -