📄 entitiestounicode.java
字号:
// It is in ISOamsn
map.put("nsub", new Character('\u2284')); // not a subset of, U+2284 ISOamsn
map.put("sube", new Character('\u2286')); // subset of or equal to, U+2286 ISOtech
map.put("supe", new Character('\u2287')); // superset of or equal to, U+2287 ISOtech
map.put("oplus", new Character('\u2295')); // circled plus = direct sum, U+2295 ISOamsb
map.put("otimes", new Character('\u2297')); // circled times = vector product, U+2297 ISOamsb
map.put("perp", new Character('\u22a5')); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
map.put("sdot", new Character('\u22c5')); // dot operator, U+22C5 ISOamsb
// dot operator is NOT the same character as U+00B7 middle dot
// Miscellaneous Technical
map.put("lceil", new Character('\u2308')); // left ceiling = apl upstile, U+2308 ISOamsc
map.put("rceil", new Character('\u2309')); // right ceiling, U+2309 ISOamsc
map.put("lfloor", new Character('\u230a')); // left floor = apl downstile, U+230A ISOamsc
map.put("rfloor", new Character('\u230b')); // right floor, U+230B ISOamsc
map.put("lang", new Character('\u2329')); // left-pointing angle bracket = bra, U+2329 ISOtech
// lang is NOT the same character as U+003C 'less than'
// or U+2039 'single left-pointing angle quotation mark'
map.put("rang", new Character('\u232a')); // right-pointing angle bracket = ket, U+232A ISOtech
// rang is NOT the same character as U+003E 'greater than'
// or U+203A 'single right-pointing angle quotation mark'
// Geometric Shapes
map.put("loz", new Character('\u25ca')); // lozenge, U+25CA ISOpub
// Miscellaneous Symbols
map.put("spades", new Character('\u2660')); // black spade suit, U+2660 ISOpub
// black here seems to mean filled as opposed to hollow
map.put("clubs", new Character('\u2663')); // black club suit = shamrock, U+2663 ISOpub
map.put("hearts", new Character('\u2665')); // black heart suit = valentine, U+2665 ISOpub
map.put("diams", new Character('\u2666')); // black diamond suit, U+2666 ISOpub
// C0 Controls and Basic Latin
map.put("quot", new Character('\u0022')); // quotation mark = APL quote, U+0022 ISOnum
map.put("amp", new Character('\u0026')); // ampersand, U+0026 ISOnum
map.put("apos", new Character('\''));
map.put("lt", new Character('\u003c')); // less-than sign, U+003C ISOnum
map.put("gt", new Character('\u003e')); // greater-than sign, U+003E ISOnum
// Latin Extended-A
map.put("OElig", new Character('\u0152')); // latin capital ligature OE, U+0152 ISOlat2
map.put("oelig", new Character('\u0153')); // latin small ligature oe, U+0153 ISOlat2
// ligature is a misnomer, this is a separate character in some languages
map.put("Scaron", new Character('\u0160')); // latin capital letter S with caron, U+0160 ISOlat2
map.put("scaron", new Character('\u0161')); // latin small letter s with caron, U+0161 ISOlat2
map.put("Yuml", new Character('\u0178')); // latin capital letter Y with diaeresis, U+0178 ISOlat2
// Spacing Modifier Letters
map.put("circ", new Character('\u02c6')); // modifier letter circumflex accent, U+02C6 ISOpub
map.put("tilde", new Character('\u02dc')); // small tilde, U+02DC ISOdia
// General Punctuation
map.put("ensp", new Character('\u2002')); // en space, U+2002 ISOpub
map.put("emsp", new Character('\u2003')); // em space, U+2003 ISOpub
map.put("thinsp", new Character('\u2009')); // thin space, U+2009 ISOpub
map.put("zwnj", new Character('\u200c')); // zero width non-joiner, U+200C NEW RFC 2070
map.put("zwj", new Character('\u200d')); // zero width joiner, U+200D NEW RFC 2070
map.put("lrm", new Character('\u200e')); // left-to-right mark, U+200E NEW RFC 2070
map.put("rlm", new Character('\u200f')); // right-to-left mark, U+200F NEW RFC 2070
map.put("ndash", new Character('\u2013')); // en dash, U+2013 ISOpub
map.put("mdash", new Character('\u2014')); // em dash, U+2014 ISOpub
map.put("lsquo", new Character('\u2018')); // left single quotation mark, U+2018 ISOnum
map.put("rsquo", new Character('\u2019')); // right single quotation mark, U+2019 ISOnum
map.put("sbquo", new Character('\u201a')); // single low-9 quotation mark, U+201A NEW
map.put("ldquo", new Character('\u201c')); // left double quotation mark, U+201C ISOnum
map.put("rdquo", new Character('\u201d')); // right double quotation mark, U+201D ISOnum
map.put("bdquo", new Character('\u201e')); // double low-9 quotation mark, U+201E NEW
map.put("dagger", new Character('\u2020')); // dagger, U+2020 ISOpub
map.put("Dagger", new Character('\u2021')); // double dagger, U+2021 ISOpub
map.put("permil", new Character('\u2030')); // per mille sign, U+2030 ISOtech
map.put("lsaquo", new Character('\u2039')); // single left-pointing angle quotation mark, U+2039 ISO proposed
// lsaquo is proposed but not yet ISO standardized
map.put("rsaquo", new Character('\u203a')); // single right-pointing angle quotation mark, U+203A ISO proposed
// rsaquo is proposed but not yet ISO standardized
map.put("euro", new Character('\u20ac')); // euro sign, U+20AC NEW
}
/**
* Translates an entity to a unicode character.
*
* @param name the name of the entity
* @return the corresponding unicode character
*/
public static char decodeEntity(String name) {
if (name.startsWith("#x")) {
try {
return (char)Integer.parseInt(name.substring(2),16);
}
catch(NumberFormatException nfe) {
return '\0';
}
}
if (name.startsWith("#")) {
try {
return (char)Integer.parseInt(name.substring(1));
}
catch(NumberFormatException nfe) {
return '\0';
}
}
Character c = (Character)map.get(name);
if (c == null)
return '\0';
else
return c.charValue();
}
/**
* Translates a String with entities (&...;) to a String without entities,
* replacing the entity with the right (unicode) character.
*/
public static String decodeString(String s) {
int pos_amp = s.indexOf('&');
if (pos_amp == -1) return s;
int pos_sc;
int pos_a;
StringBuffer buf = new StringBuffer(s.substring(0, pos_amp));
char replace;
while (true) {
pos_sc = s.indexOf(';', pos_amp);
if (pos_sc == -1) {
buf.append(s.substring(pos_amp));
return buf.toString();
}
pos_a = s.indexOf('&', pos_amp + 1);
while (pos_a != -1 && pos_a < pos_sc) {
buf.append(s.substring(pos_amp, pos_a));
pos_amp = pos_a;
pos_a = s.indexOf('&', pos_amp + 1);
}
replace = decodeEntity(s.substring(pos_amp + 1, pos_sc));
if (s.length() < pos_sc + 1) {
return buf.toString();
}
if (replace == '\0') {
buf.append(s.substring(pos_amp, pos_sc + 1));
}
else {
buf.append(replace);
}
pos_amp = s.indexOf('&', pos_sc);
if (pos_amp == -1) {
buf.append(s.substring(pos_sc + 1));
return buf.toString();
}
else {
buf.append(s.substring(pos_sc + 1, pos_amp));
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -