📄 cmsencoder.java
字号:
/*
* File : $Source: /usr/local/cvs/opencms/src/org/opencms/i18n/CmsEncoder.java,v $
* Date : $Date: 2006/07/20 13:46:39 $
* Version: $Revision: 1.20 $
*
* This library is part of OpenCms -
* the Open Source Content Mananagement System
*
* Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.i18n;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.util.CmsStringUtil;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
/**
* The OpenCms CmsEncoder class provides static methods to decode and encode data.<p>
*
* The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and
* <code>java.net.URLDecoder.decode()</code>. Use the methods from this class in all OpenCms
* core classes to ensure the encoding is always handled the same way.<p>
*
* The de- and encoding uses the same coding mechanism as JavaScript, special characters are
* replaxed with <code>%hex</code> where hex is a two digit hex number.<p>
*
* <b>Note:</b> On the client side (browser) instead of using corresponding <code>escape</code>
* and <code>unescape</code> JavaScript functions, better use <code>encodeURIComponent</code> and
* <code>decodeURIComponent</code> functions wich are work properly with unicode characters.
* These functions are supported in IE 5.5+ and NS 6+ only.<p>
*
* @author Alexander Kandzior
*
* @version $Revision: 1.20 $
*
* @since 6.0.0
*/
public final class CmsEncoder {
/** Constant for the standard <code>ISO-8859-1</code> encoding. */
public static final String ENCODING_ISO_8859_1 = "ISO-8859-1";
/** Constant for the standard <code>US-ASCII</code> encoding. */
public static final String ENCODING_US_ASCII = "US-ASCII";
/**
* Constant for the standard <code>UTF-8</code> encoding.<p>
*
* Default encoding for JavaScript decodeUriComponent methods is <code>UTF-8</code> by w3c standard.
*/
public static final String ENCODING_UTF_8 = "UTF-8";
/** The regex pattern to match HTML entities. */
private static final Pattern ENTITIY_PATTERN = Pattern.compile("\\&#\\d+;");
/** The prefix for HTML entities. */
private static final String ENTITY_PREFIX = "&#";
/** The replacement for HTML entity prefix in parameters. */
private static final String ENTITY_REPLACEMENT = "$$";
/** The log object for this class. */
private static final Log LOG = CmsLog.getLog(CmsEncoder.class);
/** A cache for encoding name lookup. */
private static Map m_encodingCache = new HashMap(16);
/** The plus entity. */
private static final String PLUS_ENTITY = ENTITY_PREFIX + "043;";
/**
* Constructor.<p>
*/
private CmsEncoder() {
// empty
}
/**
* Adjusts the given String by making sure all characters that can be displayed
* in the given charset are contained as chars, whereas all other non-displayable
* characters are converted to HTML entities.<p>
*
* Just calls {@link #decodeHtmlEntities(String, String)} first and feeds the result
* to {@link #encodeHtmlEntities(String, String)}. <p>
*
* @param input the input to adjust the HTML encoding for
* @param encoding the charset to encode the result with
* @return the input with the decoded/encoded HTML entities
*/
public static String adjustHtmlEncoding(String input, String encoding) {
return encodeHtmlEntities(decodeHtmlEntities(input, encoding), encoding);
}
/**
* Changes the encoding of a byte array that represents a String.<p>
*
* @param input the byte array to convert
* @param oldEncoding the current encoding of the byte array
* @param newEncoding the new encoding of the byte array
* @return byte[] the byte array encoded in the new encoding
*/
public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) {
if ((oldEncoding == null) || (newEncoding == null)) {
return input;
}
if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) {
return input;
}
byte[] result = input;
try {
result = (new String(input, oldEncoding)).getBytes(newEncoding);
} catch (UnsupportedEncodingException e) {
// return value will be input value
}
return result;
}
/**
* Creates a String out of a byte array with the specified encoding, falling back
* to the system default in case the encoding name is not valid.<p>
*
* Use this method as a replacement for <code>new String(byte[], encoding)</code>
* to avoid possible encoding problems.<p>
*
* @param bytes the bytes to decode
* @param encoding the encoding scheme to use for decoding the bytes
* @return the bytes decoded to a String
*/
public static String createString(byte[] bytes, String encoding) {
if (encoding.intern() != OpenCms.getSystemInfo().getDefaultEncoding()) {
encoding = lookupEncoding(encoding, null);
}
if (encoding != null) {
try {
return new String(bytes, encoding);
} catch (UnsupportedEncodingException e) {
// this can _never_ happen since the charset was looked up first
}
} else {
if (LOG.isWarnEnabled()) {
LOG.warn(Messages.get().getBundle().key(Messages.ERR_UNSUPPORTED_VM_ENCODING_1, encoding));
}
encoding = OpenCms.getSystemInfo().getDefaultEncoding();
try {
return new String(bytes, encoding);
} catch (UnsupportedEncodingException e) {
// this can also _never_ happen since the default encoding is always valid
}
}
// this code is unreachable in pratice
LOG.error(Messages.get().getBundle().key(Messages.ERR_ENCODING_ISSUES_1, encoding));
return null;
}
/**
* Decodes a String using UTF-8 encoding, which is the standard for http data transmission
* with GET ant POST requests.<p>
*
* @param source the String to decode
* @return String the decoded source String
*/
public static String decode(String source) {
return decode(source, ENCODING_UTF_8);
}
/**
* This method is a substitute for <code>URLDecoder.decode()</code>.
* Use this in all OpenCms core classes to ensure the encoding is
* always handled the same way.<p>
*
* In case you don't know what encoding to use, set the value of
* the <code>encoding</code> parameter to <code>null</code>.
* This method will then default to UTF-8 encoding, which is propably the right one.<p>
*
* @param source The string to decode
* @param encoding The encoding to use (if null, the system default is used)
* @return The decoded source String
*/
public static String decode(String source, String encoding) {
if (source == null) {
return null;
}
if (encoding != null) {
try {
return URLDecoder.decode(source, encoding);
} catch (java.io.UnsupportedEncodingException e) {
// will fallback to default
}
}
// fallback to default decoding
try {
return URLDecoder.decode(source, ENCODING_UTF_8);
} catch (java.io.UnsupportedEncodingException e) {
// ignore
}
return source;
}
/**
* Decodes HTML entity references like <code>&#8364;</code> that are contained in the
* String to a regulat character, but only if that character is contained in the given
* encodings charset.<p>
*
* @param input the input to decode the HTML enties in
* @param encoding the charset to decode the input for
* @return the input with the decoded HTML entities
* @see #encodeHtmlEntities(String, String)
*/
public static String decodeHtmlEntities(String input, String encoding) {
Matcher matcher = ENTITIY_PATTERN.matcher(input);
StringBuffer result = new StringBuffer(input.length());
Charset charset = Charset.forName(encoding);
CharsetEncoder encoder = charset.newEncoder();
while (matcher.find()) {
String entity = matcher.group();
String value = entity.substring(2, entity.length() - 1);
int c = Integer.valueOf(value).intValue();
if (c < 128) {
// first 128 chars are contained in almost every charset
entity = new String(new char[] {(char)c});
// this is intendend as performance improvement since
// the canEncode() operation appears quite CPU heavy
} else if (encoder.canEncode((char)c)) {
// encoder can endoce this char
entity = new String(new char[] {(char)c});
}
matcher.appendReplacement(result, entity);
}
matcher.appendTail(result);
return result.toString();
}
/**
* Decodes a string used as parameter in an uri in a way independent of other encodings/decodings applied before.<p>
*
* @param input the encoded parameter string
* @return the decoded parameter string
* @see #encodeParameter(String)
*/
public static String decodeParameter(String input) {
String result = CmsStringUtil.substitute(input, ENTITY_REPLACEMENT, ENTITY_PREFIX);
return CmsEncoder.decodeHtmlEntities(result, OpenCms.getSystemInfo().getDefaultEncoding());
}
/**
* Encodes a String using UTF-8 encoding, which is the standard for http data transmission
* with GET ant POST requests.<p>
*
* @param source the String to encode
* @return String the encoded source String
*/
public static String encode(String source) {
return encode(source, ENCODING_UTF_8);
}
/**
* This method is a substitute for <code>URLEncoder.encode()</code>.
* Use this in all OpenCms core classes to ensure the encoding is
* always handled the same way.<p>
*
* In case you don't know what encoding to use, set the value of
* the <code>encoding</code> parameter to <code>null</code>.
* This method will then default to UTF-8 encoding, which is propably the right one.<p>
*
* @param source the String to encode
* @param encoding the encoding to use (if null, the system default is used)
* @return the encoded source String
*/
public static String encode(String source, String encoding) {
if (source == null) {
return null;
}
if (encoding != null) {
try {
return URLEncoder.encode(source, encoding);
} catch (java.io.UnsupportedEncodingException e) {
// will fallback to default
}
}
// fallback to default encoding
try {
return URLEncoder.encode(source, ENCODING_UTF_8);
} catch (java.io.UnsupportedEncodingException e) {
// ignore
}
return source;
}
/**
* Encodes all characters that are contained in the String which can not displayed
* in the given encodings charset with HTML entity references
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -