📄 htmlencoder.java
字号:
package jodd.servlet;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
/**
* Encodes text strings and URLs to be HTML-safe.
*/
public final class HtmlEncoder {
private static float newSizeFactor = 1.3f;
/**
* Lookup table for use in encode() method.
*
* @see #encode
*/
public static final String[] TABLE_HTML = new String[256];
/**
* Lookup table for use in encodeTextXxx() methods.
*
* @see #encodeText
* @see #encodeTextSmart
* @see #encodeTextStrict
*/
public static final String[] TABLE_HTML_STRICT = new String[256];
static {
for (int i = 0; i < 10; i++) {
TABLE_HTML[i] = "�" + i + ";";
}
for (int i = 10; i < 32; i++) {
TABLE_HTML[i] = "�" + i + ";";
}
for (int i = 32; i < 128; i++) {
TABLE_HTML[i] = String.valueOf((char)i);
}
for (int i = 128; i < 256; i++) {
TABLE_HTML[i] = "&#" + i + ";";
}
// special characters
TABLE_HTML['\''] = "'"; // apostrophe (''' doesn't work - it is not by the w3 specs)
TABLE_HTML['\"'] = """; // double quote
TABLE_HTML['&'] = "&"; // ampersand
TABLE_HTML['<'] = "<"; // lower than
TABLE_HTML['>'] = ">"; // greater than
// strict table
System.arraycopy(TABLE_HTML, 0, TABLE_HTML_STRICT, 0, 256);
TABLE_HTML_STRICT[' '] = " ";
TABLE_HTML_STRICT['\n'] = "<br>"; // ascii 10
TABLE_HTML_STRICT['\r'] = "<br>"; // ascii 13
}
// ---------------------------------------------------------------- encoding
/**
* Encode string to HTML-safe text. Extra characters are encoded as decimals,
* and five special characters are replaced with their HTML values:
* <li>' with &#039;</li>
* <li>" with &quot;</li>
* <li>& with &amp;</li>
* <li>< with &lt;</li>
* <li>> with &gt;</li>
*
* @param string input string
*
* @return HTML-safe string
* @see #encodeText
*/
public static String encode(String string) {
if ((string == null) || (string.length() == 0)) {
return "";
}
int n = string.length();
StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
int tableLen = TABLE_HTML.length;
char c;
for (int i = 0; i < n; i++) {
c = string.charAt(i);
if (c < tableLen) {
buffer.append(TABLE_HTML[c]);
} else {
buffer.append("&#").append((int)c).append(';');
}
}
return buffer.toString();
}
/**
* Encodes text int HTML-safe text and preserves format. Additionaly, the following
* characters are replaced:
* <li>' ' with &nbsp;</li>
* <li>\n with <br></li>
* <li>\r with <br></li>
* <br><br>
* Additionaly, this method takes care about CRLF and LF texts and handles
* both.
*
* Common problem with this method is that spaces are not breakable, so they
* may break the outline of the page.
*
* @param string input string
*
* @return HTML-safe format
*/
public static String encodeTextStrict(String string) {
if ((string == null) || (string.length() == 0)) {
return "";
}
int n = string.length();
StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
int tableLen = TABLE_HTML_STRICT.length;
char c = 0, prev = 0;
for (int i = 0; i < n; i++, prev = c) {
c = string.charAt(i);
if ((c == '\n') && (prev == '\r')) {
continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
}
if (c < tableLen) {
buffer.append(TABLE_HTML_STRICT[c]);
} else {
buffer.append("&#").append((int)c).append(';');
}
}
return buffer.toString();
}
/**
* Encodes text int HTML-safe text and preserves format except spaces.
* Additionaly, the following characters are replaced:
*
* <li>\n with <br></li>
* <li>\r with <br></li>
* <br><br>
* Additionaly, this method takes care about CRLF and LF texts and handles
* both.
*
* @param string input string
*
* @return HTML-safe format
*/
public static String encodeText(String string) {
if ((string == null) || (string.length() == 0)) {
return "";
}
int n = string.length();
StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
int tableLen = TABLE_HTML_STRICT.length;
char c = 0, prev = 0;
for (int i = 0; i < n; i++, prev = c) {
c = string.charAt(i);;
if (c == ' ') {
buffer.append(' ');
continue;
}
if ((c == '\n') && (prev == '\r')) {
continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
}
if (c < tableLen) {
buffer.append(TABLE_HTML_STRICT[c]);
} else {
buffer.append("&#").append((int)c).append(';');
}
}
return buffer.toString();
}
/**
* Encodes text int HTML-safe text and preserves format using smart spaces.
* Additionaly, the following characters are replaced:
*
* <li>\n with <br></li>
* <li>\r with <br></li>
* <br><br>
* Additionaly, this method takes care about CRLF and LF texts and handles
* both.<br>
*
* This method is special since it preserves format, but with combination of
* not-breakable spaces and common spaces, so breaks are availiable.
*
* @param string input string
*
* @return HTML-safe format
*/
public static String encodeTextSmart(String string) {
if ((string == null) || (string.length() == 0)) {
return "";
}
int n = string.length();
StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
int tableLen = TABLE_HTML_STRICT.length;
char c = 0, prev = 0;
boolean prevSpace = false;
for (int i = 0; i < n; i++, prev = c) {
c = string.charAt(i);;
if (c == ' ') {
if (prev != ' ') {
prevSpace = false;
}
if (prevSpace == false) {
buffer.append(' ');
} else {
buffer.append(" ");
}
prevSpace = !prevSpace;
continue;
}
if ((c == '\n') && (prev == '\r')) {
continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
}
if (c < tableLen) {
buffer.append(TABLE_HTML_STRICT[c]);
} else {
buffer.append("&#").append((int)c).append(';');
}
}
return buffer.toString();
}
// ---------------------------------------------------------------- URL encode/decode
/**
* Encodes HTML JavaScript for page output using ISO-88591-1 encoding. Null
* strings are converted to empty ones. Unfortunatelly, this encoding is not
* comatible with the javascripts functions escape/unescape.
*
* @param string input string
*
* @return HTML ready string.
*/
public static String encodeUrl(String string) {
return encodeUrl(string, "ISO-8859-1");
}
/**
* Encodes HTML JavaScript for page output. Null strings are converted to
* empty ones.
*
* @param string input string
* @param encoding
*
* @return HTML ready string.
*/
public static String encodeUrl(String string, String encoding) {
if (string == null) {
return "";
}
try {
return URLEncoder.encode(string, encoding);
} catch (UnsupportedEncodingException e) {
return null;
}
}
/**
* Encodes HTML JavaScript for page output using ISO-88591-1 encoding. Null
* strings are converted to empty ones.
*
* @param string input
*
* @return HTML ready string.
*/
public static String decodeUrl(String string) {
return decodeUrl(string, "ISO-8859-1");
}
/**
* Encodes HTML JavaScript for page output. Null strings are converted to empty ones.
*
* @param string input
* @param encoding encoding
*
* @return HTML ready string.
*/
public static String decodeUrl(String string, String encoding) {
if (string == null) {
return "";
}
try {
return URLDecoder.decode(string, encoding);
} catch (UnsupportedEncodingException e) {
return null;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -