📄 htmldecoder.java
字号:
package com.laozizhu.search.util;
import java.util.HashMap;
/**
* 替换HTMl里面的字符 e.g.: < > " å И 水
*
* @author 老紫竹(laozizhu.com)
*/
public class HTMLDecoder {
public static final HashMap<String, Character> charTable;
public static String decode(String s) {
String t;
Character ch;
int tmpPos, i;
int maxPos = s.length();
StringBuffer sb = new StringBuffer(maxPos);
int curPos = 0;
while (curPos < maxPos) {
char c = s.charAt(curPos++);
if (c == '&') {
tmpPos = curPos;
if (tmpPos < maxPos) {
char d = s.charAt(tmpPos++);
if (d == '#') {
if (tmpPos < maxPos) {
d = s.charAt(tmpPos++);
if ((d == 'x') || (d == 'X')) {
if (tmpPos < maxPos) {
d = s.charAt(tmpPos++);
if (isHexDigit(d)) {
while (tmpPos < maxPos) {
d = s.charAt(tmpPos++);
if (!isHexDigit(d)) {
if (d == ';') {
t = s.substring(curPos + 2, tmpPos - 1);
try {
i = Integer.parseInt(t, 16);
if ((i >= 0) && (i < 65536)) {
c = (char) i;
curPos = tmpPos;
}
} catch (NumberFormatException e) {
}
}
break;
}
}
}
}
} else if (isDigit(d)) {
while (tmpPos < maxPos) {
d = s.charAt(tmpPos++);
if (!isDigit(d)) {
if (d == ';') {
t = s.substring(curPos + 1, tmpPos - 1);
try {
i = Integer.parseInt(t);
if ((i >= 0) && (i < 65536)) {
c = (char) i;
curPos = tmpPos;
}
} catch (NumberFormatException e) {
}
}
break;
}
}
}
}
} else if (isLetter(d)) {
while (tmpPos < maxPos) {
d = s.charAt(tmpPos++);
if (!isLetterOrDigit(d)) {
if (d == ';') {
t = s.substring(curPos, tmpPos - 1);
ch = (Character) charTable.get(t);
if (ch != null) {
c = ch.charValue();
curPos = tmpPos;
}
}
break;
}
}
}
}
}
sb.append(c);
}
return sb.toString();
}
private static boolean isLetterOrDigit(char c) {
return isLetter(c) || isDigit(c);
}
private static boolean isHexDigit(char c) {
return isHexLetter(c) || isDigit(c);
}
private static boolean isLetter(char c) {
return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
}
private static boolean isHexLetter(char c) {
return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F'));
}
private static boolean isDigit(char c) {
return (c >= '0') && (c <= '9');
}
public static String compact(String s) {
int maxPos = s.length();
StringBuffer sb = new StringBuffer(maxPos);
int curPos = 0;
while (curPos < maxPos) {
char c = s.charAt(curPos++);
if (isWhitespace(c)) {
while ((curPos < maxPos) && isWhitespace(s.charAt(curPos))) {
curPos++;
}
c = '\u0020';
}
sb.append(c);
}
return sb.toString();
}
// HTML is very particular about what constitutes white space.
public static boolean isWhitespace(char ch) {
return (ch == '\u0020') || (ch == '\r') || (ch == '\n') || (ch == '\u0009') || (ch == '\u000c') || (ch == '\u200b');
}
static {
charTable = new HashMap<String, Character>();
charTable.put("quot", new Character((char) 34));
charTable.put("amp", new Character((char) 38));
charTable.put("apos", new Character((char) 39));
charTable.put("lt", new Character((char) 60));
charTable.put("gt", new Character((char) 62));
charTable.put("nbsp", new Character(' '));
charTable.put("iexcl", new Character((char) 161));
charTable.put("cent", new Character((char) 162));
charTable.put("pound", new Character((char) 163));
charTable.put("curren", new Character((char) 164));
charTable.put("yen", new Character((char) 165));
charTable.put("brvbar", new Character((char) 166));
charTable.put("sect", new Character((char) 167));
charTable.put("uml", new Character((char) 168));
charTable.put("copy", new Character((char) 169));
charTable.put("ordf", new Character((char) 170));
charTable.put("laquo", new Character((char) 171));
charTable.put("not", new Character((char) 172));
charTable.put("shy", new Character((char) 173));
charTable.put("reg", new Character((char) 174));
charTable.put("macr", new Character((char) 175));
charTable.put("deg", new Character((char) 176));
charTable.put("plusmn", new Character((char) 177));
charTable.put("sup2", new Character((char) 178));
charTable.put("sup3", new Character((char) 179));
charTable.put("acute", new Character((char) 180));
charTable.put("micro", new Character((char) 181));
charTable.put("para", new Character((char) 182));
charTable.put("middot", new Character((char) 183));
charTable.put("cedil", new Character((char) 184));
charTable.put("sup1", new Character((char) 185));
charTable.put("ordm", new Character((char) 186));
charTable.put("raquo", new Character((char) 187));
charTable.put("frac14", new Character((char) 188));
charTable.put("frac12", new Character((char) 189));
charTable.put("frac34", new Character((char) 190));
charTable.put("iquest", new Character((char) 191));
charTable.put("Agrave", new Character((char) 192));
charTable.put("Aacute", new Character((char) 193));
charTable.put("Acirc", new Character((char) 194));
charTable.put("Atilde", new Character((char) 195));
charTable.put("Auml", new Character((char) 196));
charTable.put("Aring", new Character((char) 197));
charTable.put("AElig", new Character((char) 198));
charTable.put("Ccedil", new Character((char) 199));
charTable.put("Egrave", new Character((char) 200));
charTable.put("Eacute", new Character((char) 201));
charTable.put("Ecirc", new Character((char) 202));
charTable.put("Euml", new Character((char) 203));
charTable.put("Igrave", new Character((char) 204));
charTable.put("Iacute", new Character((char) 205));
charTable.put("Icirc", new Character((char) 206));
charTable.put("Iuml", new Character((char) 207));
charTable.put("ETH", new Character((char) 208));
charTable.put("Ntilde", new Character((char) 209));
charTable.put("Ograve", new Character((char) 210));
charTable.put("Oacute", new Character((char) 211));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -