📄 cmsencoder.java
字号:
* like <code>&#8364;</code>.<p>
*
* This is required since a Java String is
* internally always stored as Unicode, meaning it can contain almost every character, but
* the HTML charset used might not support all such characters.<p>
*
* @param input the input to encode for HTML
* @param encoding the charset to encode the result with
* @return the input with the encoded HTML entities
* @see #decodeHtmlEntities(String, String)
*/
public static String encodeHtmlEntities(String input, String encoding) {
StringBuffer result = new StringBuffer(input.length() * 2);
CharBuffer buffer = CharBuffer.wrap(input.toCharArray());
Charset charset = Charset.forName(encoding);
CharsetEncoder encoder = charset.newEncoder();
for (int i = 0; i < buffer.length(); i++) {
int c = buffer.get(i);
if (c < 128) {
// first 128 chars are contained in almost every charset
result.append((char)c);
// this is intendend as performance improvement since
// the canEncode() operation appears quite CPU heavy
} else if (encoder.canEncode((char)c)) {
// encoder can endoce this char
result.append((char)c);
} else {
// append HTML entiry reference
result.append(ENTITY_PREFIX);
result.append(c);
result.append(";");
}
}
return result.toString();
}
/**
* Encodes a string used as parameter in an uri in a way independent of other encodings/decodings applied later.<p>
*
* Used to ensure that GET parameters are not wrecked by wrong or incompatible configuration settings.
* In order to ensure this, the String is first encoded with html entities for any character that cannot encoded
* in US-ASCII; additionally, the plus sign is also encoded to avoid problems with the white-space replacer.
* Finally, the entity prefix is replaced with characters not used as delimiters in urls.<p>
*
* @param input the parameter string
* @return the encoded parameter string
*/
public static String encodeParameter(String input) {
String result = CmsEncoder.encodeHtmlEntities(input, CmsEncoder.ENCODING_US_ASCII);
result = CmsStringUtil.substitute(result, "+", PLUS_ENTITY);
return CmsStringUtil.substitute(result, ENTITY_PREFIX, ENTITY_REPLACEMENT);
}
/**
* Encodes a String in a way that is compatible with the JavaScript escape function.
*
* @param source The textstring to be encoded.
* @param encoding the encoding type
* @return The JavaScript escaped string.
*/
public static String escape(String source, String encoding) {
// the blank is encoded into "+" not "%20" when using standard encode call
return CmsStringUtil.substitute(encode(source, encoding), "+", "%20");
}
/**
* Escapes special characters in a HTML-String with their number-based
* entity representation, for example & becomes &#38;.<p>
*
* A character <code>num</code> is replaced if<br>
* <code>((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p>
*
* @param source the String to escape
* @return String the escaped String
*
* @see #escapeXml(String)
*/
public static String escapeHtml(String source) {
int terminatorIndex;
if (source == null) {
return null;
}
StringBuffer result = new StringBuffer(source.length() * 2);
for (int i = 0; i < source.length(); i++) {
int ch = source.charAt(i);
// avoid escaping already escaped characters
if (ch == 38) {
terminatorIndex = source.indexOf(";", i);
if (terminatorIndex > 0) {
if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) {
result.append(source.substring(i, terminatorIndex + 1));
// Skip remaining chars up to (and including) ";"
i = terminatorIndex;
continue;
}
}
}
if ((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) {
result.append(ENTITY_PREFIX);
result.append(ch);
result.append(";");
} else {
result.append((char)ch);
}
}
return new String(result);
}
/**
* Escapes non ASCII characters in a HTML-String with their number-based
* entity representation, for example & becomes &#38;.<p>
*
* A character <code>num</code> is replaced if<br>
* <code>(ch > 255)</code><p>
*
* @param source the String to escape
* @return String the escaped String
*
* @see #escapeXml(String)
*/
public static String escapeNonAscii(String source) {
if (source == null) {
return null;
}
StringBuffer result = new StringBuffer(source.length() * 2);
for (int i = 0; i < source.length(); i++) {
int ch = source.charAt(i);
if (ch > 255) {
result.append(ENTITY_PREFIX);
result.append(ch);
result.append(";");
} else {
result.append((char)ch);
}
}
return new String(result);
}
/**
* Encodes a String in a way that is compatible with the JavaScript escape function.
* Muliple blanks are encoded _multiply _with %20.
*
* @param source The textstring to be encoded.
* @param encoding the encoding type
* @return The JavaScript escaped string.
*/
public static String escapeWBlanks(String source, String encoding) {
if (CmsStringUtil.isEmpty(source)) {
return source;
}
StringBuffer ret = new StringBuffer(source.length() * 2);
// URLEncode the text string
// this produces a very similar encoding to JavaSscript encoding,
// except the blank which is not encoded into "%20" instead of "+"
String enc = encode(source, encoding);
for (int z = 0; z < enc.length(); z++) {
char c = enc.charAt(z);
if (c == '+') {
ret.append("%20");
} else {
ret.append(c);
}
}
return ret.toString();
}
/**
* Escapes a String so it may be printed as text content or attribute
* value in a HTML page or an XML file.<p>
*
* This method replaces the following characters in a String:
* <ul>
* <li><b><</b> with &lt;
* <li><b>></b> with &gt;
* <li><b>&</b> with &amp;
* <li><b>"</b> with &quot;
* </ul>
*
* @param source the string to escape
* @return the escaped string
*
* @see #escapeHtml(String)
*/
public static String escapeXml(String source) {
return escapeXml(source, false);
}
/**
* Escapes a String so it may be printed as text content or attribute
* value in a HTML page or an XML file.<p>
*
* This method replaces the following characters in a String:
* <ul>
* <li><b><</b> with &lt;
* <li><b>></b> with &gt;
* <li><b>&</b> with &amp;
* <li><b>"</b> with &quot;
* </ul>
*
* @param source the string to escape
* @param doubleEscape if <code>false</code>, all entities that already are escaped are left untouched
*
* @return the escaped string
*
* @see #escapeHtml(String)
*/
public static String escapeXml(String source, boolean doubleEscape) {
if (source == null) {
return null;
}
StringBuffer result = new StringBuffer(source.length() * 2);
for (int i = 0; i < source.length(); ++i) {
char ch = source.charAt(i);
switch (ch) {
case '<':
result.append("<");
break;
case '>':
result.append(">");
break;
case '&':
// don't escape already escaped international and special characters
if (!doubleEscape) {
int terminatorIndex = source.indexOf(";", i);
if (terminatorIndex > 0) {
if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+")) {
result.append(ch);
break;
}
}
}
// note that to other "break" in the above "if" block
result.append("&");
break;
case '"':
result.append(""");
break;
default:
result.append(ch);
}
}
return new String(result);
}
/**
* Checks if a given encoding name is actually supported, and if so
* resolves it to it's canonical name, if not it returns the given fallback
* value.<p>
*
* Charsets have a set of aliases. For example, valid aliases for "UTF-8"
* are "UTF8", "utf-8" or "utf8". This method resolves any given valid charset name
* to it's "canonical" form, so that simple String comparison can be used
* when checking charset names internally later.<p>
*
* Please see <a href="http://www.iana.org/assignments/character-sets">http://www.iana.org/assignments/character-sets</a>
* for a list of valid charset alias names.<p>
*
* @param encoding the encoding to check and resolve
* @param fallback the fallback encoding scheme
* @return the resolved encoding name, or the fallback value
*/
public static String lookupEncoding(String encoding, String fallback) {
String result = (String)m_encodingCache.get(encoding);
if (result != null) {
return result;
}
try {
result = Charset.forName(encoding).name();
m_encodingCache.put(encoding, result);
return result;
} catch (Throwable t) {
// we will use the default value as fallback
}
return fallback;
}
/**
* Re-decodes a String that has not been correctly decoded and thus has scrambled
* character bytes.<p>
*
* This is an equivalent to the JavaScript "decodeURIComponent" function.
* It converts from the default "UTF-8" to the currently selected system encoding.<p>
*
* @param input the String to convert
* @return String the converted String
*/
public static String redecodeUriComponent(String input) {
if (input == null) {
return input;
}
return new String(
changeEncoding(input.getBytes(), ENCODING_UTF_8, OpenCms.getSystemInfo().getDefaultEncoding()));
}
/**
* Decodes a String in a way that is compatible with the JavaScript
* unescape function.
*
* @param source The String to be decoded.
* @param encoding the encoding type
* @return The JavaScript unescaped String.
*/
public static String unescape(String source, String encoding) {
if (source == null) {
return null;
}
int len = source.length();
// to use standard decoder we need to replace '+' with "%20" (space)
StringBuffer preparedSource = new StringBuffer(len);
for (int i = 0; i < len; i++) {
char c = source.charAt(i);
if (c == '+') {
preparedSource.append("%20");
} else {
preparedSource.append(c);
}
}
return decode(preparedSource.toString(), encoding);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -