📄 cmsencoder.java

📁 cms是开源的框架
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * File   : $Source: /usr/local/cvs/opencms/src/org/opencms/i18n/CmsEncoder.java,v $
 * Date   : $Date: 2006/07/20 13:46:39 $
 * Version: $Revision: 1.20 $
 *
 * This library is part of OpenCms -
 * the Open Source Content Mananagement System
 *
 * Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software GmbH, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.i18n;

import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.util.CmsStringUtil;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;

/**
 * The OpenCms CmsEncoder class provides static methods to decode and encode data.<p>
 * 
 * The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and
 * <code>java.net.URLDecoder.decode()</code>. Use the methods from this class in all OpenCms 
 * core classes to ensure the encoding is always handled the same way.<p>
 * 
 * The de- and encoding uses the same coding mechanism as JavaScript, special characters are
 * replaxed with <code>%hex</code> where hex is a two digit hex number.<p>
 * 
 * <b>Note:</b> On the client side (browser) instead of using corresponding <code>escape</code>
 * and <code>unescape</code> JavaScript functions, better use <code>encodeURIComponent</code> and
 * <code>decodeURIComponent</code> functions wich are work properly with unicode characters.
 * These functions are supported in IE 5.5+ and NS 6+ only.<p>
 *
 * @author Alexander Kandzior 
 * 
 * @version $Revision: 1.20 $ 
 * 
 * @since 6.0.0 
 */
public final class CmsEncoder {

    /** Constant for the standard <code>ISO-8859-1</code> encoding. */
    public static final String ENCODING_ISO_8859_1 = "ISO-8859-1";

    /** Constant for the standard <code>US-ASCII</code> encoding. */
    public static final String ENCODING_US_ASCII = "US-ASCII";

    /** 
     * Constant for the standard <code>UTF-8</code> encoding.<p>
     * 
     * Default encoding for JavaScript decodeUriComponent methods is <code>UTF-8</code> by w3c standard. 
     */
    public static final String ENCODING_UTF_8 = "UTF-8";

    /** The regex pattern to match HTML entities. */
    private static final Pattern ENTITIY_PATTERN = Pattern.compile("\\&#\\d+;");

    /** The prefix for HTML entities. */
    private static final String ENTITY_PREFIX = "&#";

    /** The replacement for HTML entity prefix in parameters. */
    private static final String ENTITY_REPLACEMENT = "$$";

    /** The log object for this class. */
    private static final Log LOG = CmsLog.getLog(CmsEncoder.class);

    /** A cache for encoding name lookup. */
    private static Map m_encodingCache = new HashMap(16);

    /** The plus entity. */
    private static final String PLUS_ENTITY = ENTITY_PREFIX + "043;";

    /**
     * Constructor.<p>
     */
    private CmsEncoder() {

        // empty
    }

    /**
     * Adjusts the given String by making sure all characters that can be displayed 
     * in the given charset are contained as chars, whereas all other non-displayable
     * characters are converted to HTML entities.<p> 
     * 
     * Just calls {@link #decodeHtmlEntities(String, String)} first and feeds the result
     * to {@link #encodeHtmlEntities(String, String)}. <p>
     *  
     * @param input the input to adjust the HTML encoding for
     * @param encoding the charset to encode the result with
     * @return the input with the decoded/encoded HTML entities
     */
    public static String adjustHtmlEncoding(String input, String encoding) {

        return encodeHtmlEntities(decodeHtmlEntities(input, encoding), encoding);
    }

    /**
     * Changes the encoding of a byte array that represents a String.<p>
     * 
     * @param input the byte array to convert
     * @param oldEncoding the current encoding of the byte array
     * @param newEncoding the new encoding of the byte array
     * @return byte[] the byte array encoded in the new encoding
     */
    public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) {

        if ((oldEncoding == null) || (newEncoding == null)) {
            return input;
        }
        if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) {
            return input;
        }
        byte[] result = input;
        try {
            result = (new String(input, oldEncoding)).getBytes(newEncoding);
        } catch (UnsupportedEncodingException e) {
            // return value will be input value
        }
        return result;
    }

    /**
     * Creates a String out of a byte array with the specified encoding, falling back
     * to the system default in case the encoding name is not valid.<p>
     * 
     * Use this method as a replacement for <code>new String(byte[], encoding)</code>
     * to avoid possible encoding problems.<p>
     * 
     * @param bytes the bytes to decode 
     * @param encoding the encoding scheme to use for decoding the bytes
     * @return the bytes decoded to a String
     */
    public static String createString(byte[] bytes, String encoding) {

        if (encoding.intern() != OpenCms.getSystemInfo().getDefaultEncoding()) {
            encoding = lookupEncoding(encoding, null);
        }
        if (encoding != null) {
            try {
                return new String(bytes, encoding);
            } catch (UnsupportedEncodingException e) {
                // this can _never_ happen since the charset was looked up first 
            }
        } else {
            if (LOG.isWarnEnabled()) {
                LOG.warn(Messages.get().getBundle().key(Messages.ERR_UNSUPPORTED_VM_ENCODING_1, encoding));
            }
            encoding = OpenCms.getSystemInfo().getDefaultEncoding();
            try {
                return new String(bytes, encoding);
            } catch (UnsupportedEncodingException e) {
                // this can also _never_ happen since the default encoding is always valid
            }
        }
        // this code is unreachable in pratice
        LOG.error(Messages.get().getBundle().key(Messages.ERR_ENCODING_ISSUES_1, encoding));
        return null;
    }

    /**
     * Decodes a String using UTF-8 encoding, which is the standard for http data transmission
     * with GET ant POST requests.<p>
     * 
     * @param source the String to decode
     * @return String the decoded source String
     */
    public static String decode(String source) {

        return decode(source, ENCODING_UTF_8);
    }

    /**
     * This method is a substitute for <code>URLDecoder.decode()</code>.
     * Use this in all OpenCms core classes to ensure the encoding is
     * always handled the same way.<p>
     * 
     * In case you don't know what encoding to use, set the value of 
     * the <code>encoding</code> parameter to <code>null</code>. 
     * This method will then default to UTF-8 encoding, which is propably the right one.<p>
     * 
     * @param source The string to decode
     * @param encoding The encoding to use (if null, the system default is used)
     * @return The decoded source String
     */
    public static String decode(String source, String encoding) {

        if (source == null) {
            return null;
        }
        if (encoding != null) {
            try {
                return URLDecoder.decode(source, encoding);
            } catch (java.io.UnsupportedEncodingException e) {
                // will fallback to default
            }
        }
        // fallback to default decoding
        try {
            return URLDecoder.decode(source, ENCODING_UTF_8);
        } catch (java.io.UnsupportedEncodingException e) {
            // ignore
        }
        return source;
    }

    /**
     * Decodes HTML entity references like <code>&amp;#8364;</code> that are contained in the 
     * String to a regulat character, but only if that character is contained in the given 
     * encodings charset.<p> 
     * 
     * @param input the input to decode the HTML enties in
     * @param encoding the charset to decode the input for
     * @return the input with the decoded HTML entities
     * @see #encodeHtmlEntities(String, String)
     */
    public static String decodeHtmlEntities(String input, String encoding) {

        Matcher matcher = ENTITIY_PATTERN.matcher(input);
        StringBuffer result = new StringBuffer(input.length());
        Charset charset = Charset.forName(encoding);
        CharsetEncoder encoder = charset.newEncoder();

        while (matcher.find()) {
            String entity = matcher.group();
            String value = entity.substring(2, entity.length() - 1);
            int c = Integer.valueOf(value).intValue();
            if (c < 128) {
                // first 128 chars are contained in almost every charset
                entity = new String(new char[] {(char)c});
                // this is intendend as performance improvement since 
                // the canEncode() operation appears quite CPU heavy
            } else if (encoder.canEncode((char)c)) {
                // encoder can endoce this char
                entity = new String(new char[] {(char)c});
            }
            matcher.appendReplacement(result, entity);
        }
        matcher.appendTail(result);
        return result.toString();
    }

    /**
     * Decodes a string used as parameter in an uri in a way independent of other encodings/decodings applied before.<p>
     * 
     * @param input the encoded parameter string
     * @return the decoded parameter string
     * @see #encodeParameter(String)
     */
    public static String decodeParameter(String input) {

        String result = CmsStringUtil.substitute(input, ENTITY_REPLACEMENT, ENTITY_PREFIX);
        return CmsEncoder.decodeHtmlEntities(result, OpenCms.getSystemInfo().getDefaultEncoding());
    }

    /**
     * Encodes a String using UTF-8 encoding, which is the standard for http data transmission
     * with GET ant POST requests.<p>
     * 
     * @param source the String to encode
     * @return String the encoded source String
     */
    public static String encode(String source) {

        return encode(source, ENCODING_UTF_8);
    }

    /**
     * This method is a substitute for <code>URLEncoder.encode()</code>.
     * Use this in all OpenCms core classes to ensure the encoding is
     * always handled the same way.<p>
     * 
     * In case you don't know what encoding to use, set the value of 
     * the <code>encoding</code> parameter to <code>null</code>. 
     * This method will then default to UTF-8 encoding, which is propably the right one.<p>
     * 
     * @param source the String to encode
     * @param encoding the encoding to use (if null, the system default is used)
     * @return the encoded source String
     */
    public static String encode(String source, String encoding) {

        if (source == null) {
            return null;
        }
        if (encoding != null) {
            try {
                return URLEncoder.encode(source, encoding);
            } catch (java.io.UnsupportedEncodingException e) {
                // will fallback to default
            }
        }
        // fallback to default encoding
        try {
            return URLEncoder.encode(source, ENCODING_UTF_8);
        } catch (java.io.UnsupportedEncodingException e) {
            // ignore
        }
        return source;
    }

    /**
     * Encodes all characters that are contained in the String which can not displayed 
     * in the given encodings charset with HTML entity references
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -