📄 entitiestounicode.java

📁 iText是一个能够快速产生PDF文件的java类库。iText的java类对于那些要产生包含文本
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
        // It is in ISOamsn
        map.put("nsub", new Character('\u2284')); // not a subset of, U+2284 ISOamsn
        map.put("sube", new Character('\u2286')); // subset of or equal to, U+2286 ISOtech
        map.put("supe", new Character('\u2287')); // superset of or equal to, U+2287 ISOtech
        map.put("oplus", new Character('\u2295')); // circled plus = direct sum, U+2295 ISOamsb
        map.put("otimes", new Character('\u2297')); // circled times = vector product, U+2297 ISOamsb
        map.put("perp", new Character('\u22a5')); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
        map.put("sdot", new Character('\u22c5')); // dot operator, U+22C5 ISOamsb
        // dot operator is NOT the same character as U+00B7 middle dot
        // Miscellaneous Technical
        map.put("lceil", new Character('\u2308')); // left ceiling = apl upstile, U+2308 ISOamsc
        map.put("rceil", new Character('\u2309')); // right ceiling, U+2309 ISOamsc
        map.put("lfloor", new Character('\u230a')); // left floor = apl downstile, U+230A ISOamsc
        map.put("rfloor", new Character('\u230b')); // right floor, U+230B ISOamsc
        map.put("lang", new Character('\u2329')); // left-pointing angle bracket = bra, U+2329 ISOtech
        // lang is NOT the same character as U+003C 'less than' 
        // or U+2039 'single left-pointing angle quotation mark'
        map.put("rang", new Character('\u232a')); // right-pointing angle bracket = ket, U+232A ISOtech
        // rang is NOT the same character as U+003E 'greater than' 
        // or U+203A 'single right-pointing angle quotation mark'
        // Geometric Shapes
        map.put("loz", new Character('\u25ca')); // lozenge, U+25CA ISOpub
        // Miscellaneous Symbols
        map.put("spades", new Character('\u2660')); // black spade suit, U+2660 ISOpub
        // black here seems to mean filled as opposed to hollow
        map.put("clubs", new Character('\u2663')); // black club suit = shamrock, U+2663 ISOpub
        map.put("hearts", new Character('\u2665')); // black heart suit = valentine, U+2665 ISOpub
        map.put("diams", new Character('\u2666')); // black diamond suit, U+2666 ISOpub
        // C0 Controls and Basic Latin
        map.put("quot", new Character('\u0022')); // quotation mark = APL quote, U+0022 ISOnum
        map.put("amp", new Character('\u0026')); // ampersand, U+0026 ISOnum
        map.put("apos", new Character('\''));
        map.put("lt", new Character('\u003c')); // less-than sign, U+003C ISOnum
        map.put("gt", new Character('\u003e')); // greater-than sign, U+003E ISOnum
        // Latin Extended-A
        map.put("OElig", new Character('\u0152')); // latin capital ligature OE, U+0152 ISOlat2
        map.put("oelig", new Character('\u0153')); // latin small ligature oe, U+0153 ISOlat2
        // ligature is a misnomer, this is a separate character in some languages
        map.put("Scaron", new Character('\u0160')); // latin capital letter S with caron, U+0160 ISOlat2
        map.put("scaron", new Character('\u0161')); // latin small letter s with caron, U+0161 ISOlat2
        map.put("Yuml", new Character('\u0178')); // latin capital letter Y with diaeresis, U+0178 ISOlat2
        // Spacing Modifier Letters
        map.put("circ", new Character('\u02c6')); // modifier letter circumflex accent, U+02C6 ISOpub
        map.put("tilde", new Character('\u02dc')); // small tilde, U+02DC ISOdia
        // General Punctuation
        map.put("ensp", new Character('\u2002')); // en space, U+2002 ISOpub
        map.put("emsp", new Character('\u2003')); // em space, U+2003 ISOpub
        map.put("thinsp", new Character('\u2009')); // thin space, U+2009 ISOpub
        map.put("zwnj", new Character('\u200c')); // zero width non-joiner, U+200C NEW RFC 2070
        map.put("zwj", new Character('\u200d')); // zero width joiner, U+200D NEW RFC 2070
        map.put("lrm", new Character('\u200e')); // left-to-right mark, U+200E NEW RFC 2070
        map.put("rlm", new Character('\u200f')); // right-to-left mark, U+200F NEW RFC 2070
        map.put("ndash", new Character('\u2013')); // en dash, U+2013 ISOpub
        map.put("mdash", new Character('\u2014')); // em dash, U+2014 ISOpub
        map.put("lsquo", new Character('\u2018')); // left single quotation mark, U+2018 ISOnum
        map.put("rsquo", new Character('\u2019')); // right single quotation mark, U+2019 ISOnum
        map.put("sbquo", new Character('\u201a')); // single low-9 quotation mark, U+201A NEW
        map.put("ldquo", new Character('\u201c')); // left double quotation mark, U+201C ISOnum
        map.put("rdquo", new Character('\u201d')); // right double quotation mark, U+201D ISOnum
        map.put("bdquo", new Character('\u201e')); // double low-9 quotation mark, U+201E NEW
        map.put("dagger", new Character('\u2020')); // dagger, U+2020 ISOpub
        map.put("Dagger", new Character('\u2021')); // double dagger, U+2021 ISOpub
        map.put("permil", new Character('\u2030')); // per mille sign, U+2030 ISOtech
        map.put("lsaquo", new Character('\u2039')); // single left-pointing angle quotation mark, U+2039 ISO proposed
        // lsaquo is proposed but not yet ISO standardized
        map.put("rsaquo", new Character('\u203a')); // single right-pointing angle quotation mark, U+203A ISO proposed
        // rsaquo is proposed but not yet ISO standardized
        map.put("euro", new Character('\u20ac')); // euro sign, U+20AC NEW
    }
    

    /**
     * Translates an entity to a unicode character.
     *
     * @param	name	the name of the entity
     * @return	the corresponding unicode character
     */
    public static char decodeEntity(String name) {
    	if (name.startsWith("#x")) {
    		try {
    			return (char)Integer.parseInt(name.substring(2),16);
    		}
    		catch(NumberFormatException nfe) {
    			return '\0';
    		}
    	}
    	if (name.startsWith("#")) {
    		try {
    			return (char)Integer.parseInt(name.substring(1));
    		}
    		catch(NumberFormatException nfe) {
    			return '\0';
    		}
    	}
    	Character c = (Character)map.get(name);
        if (c == null)
            return '\0';
        else
            return c.charValue();
    }
    
    /**
     * Translates a String with entities (&...;) to a String without entities,
     * replacing the entity with the right (unicode) character.
     */
    public static String decodeString(String s) {
    	int pos_amp = s.indexOf('&');
    	if (pos_amp == -1) return s;
    	
    	int pos_sc;
    	int pos_a;
    	StringBuffer buf = new StringBuffer(s.substring(0, pos_amp));
    	char replace;
    	while (true) {
    		pos_sc = s.indexOf(';', pos_amp);
    		if (pos_sc == -1) {
    			buf.append(s.substring(pos_amp));
    			return buf.toString();
    		}
    		pos_a = s.indexOf('&', pos_amp + 1);
    		while (pos_a != -1 && pos_a < pos_sc) {
    			buf.append(s.substring(pos_amp, pos_a));
    			pos_amp = pos_a;
    			pos_a = s.indexOf('&', pos_amp + 1);
    		}
    		replace = decodeEntity(s.substring(pos_amp + 1, pos_sc));
    		if (s.length() < pos_sc + 1) {
    			return buf.toString();
    		}
    		if (replace == '\0') {
    			buf.append(s.substring(pos_amp, pos_sc + 1));
    		}
    		else {
    			buf.append(replace);
    		}
    		pos_amp = s.indexOf('&', pos_sc);
    		if (pos_amp == -1) {
    			buf.append(s.substring(pos_sc + 1));
    			return buf.toString();
    		}
    		else {
    			buf.append(s.substring(pos_sc + 1, pos_amp));
    		}
    	}
    }
}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -