⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlconverter.java

📁 非常棒的java数据库
💻 JAVA
字号:
/*
 * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
 * (license2)
 * Initial Developer: H2 Group
 */
package org.h2.tools.indexer;

import java.util.HashMap;

/**
 * This class replaces HTML entities in text (for example ü) to the correct
 * character and vice versa.
 */
public class HtmlConverter {
    private static HashMap charMap = new HashMap();
    private static HashMap codeMap = new HashMap();

    private static final String[] CHARS = { "quot:34", "amp:38", "lt:60", "gt:62", "nbsp:160", "iexcl:161", "cent:162",
            "pound:163", "curren:164", "yen:165", "brvbar:166", "sect:167", "uml:168", "copy:169", "ordf:170",
            "laquo:171", "not:172", "shy:173", "reg:174", "macr:175", "deg:176", "plusmn:177", "sup2:178", "sup3:179",
            "acute:180", "micro:181", "para:182", "middot:183", "cedil:184", "sup1:185", "ordm:186", "raquo:187",
            "frac14:188", "frac12:189", "frac34:190", "iquest:191", "Agrave:192", "Aacute:193", "Acirc:194",
            "Atilde:195", "Auml:196", "Aring:197", "AElig:198", "Ccedil:199", "Egrave:200", "Eacute:201", "Ecirc:202",
            "Euml:203", "Igrave:204", "Iacute:205", "Icirc:206", "Iuml:207", "ETH:208", "Ntilde:209", "Ograve:210",
            "Oacute:211", "Ocirc:212", "Otilde:213", "Ouml:214", "times:215", "Oslash:216", "Ugrave:217", "Uacute:218",
            "Ucirc:219", "Uuml:220", "Yacute:221", "THORN:222", "szlig:223", "agrave:224", "aacute:225", "acirc:226",
            "atilde:227", "auml:228", "aring:229", "aelig:230", "ccedil:231", "egrave:232", "eacute:233", "ecirc:234",
            "euml:235", "igrave:236", "iacute:237", "icirc:238", "iuml:239", "eth:240", "ntilde:241", "ograve:242",
            "oacute:243", "ocirc:244", "otilde:245", "ouml:246", "divide:247", "oslash:248", "ugrave:249",
            "uacute:250", "ucirc:251", "uuml:252", "yacute:253", "thorn:254", "yuml:255", "Alpha:913", "alpha:945",
            "Beta:914", "beta:946", "Gamma:915", "gamma:947", "Delta:916", "delta:948", "Epsilon:917", "epsilon:949",
            "Zeta:918", "zeta:950", "Eta:919", "eta:951", "Theta:920", "theta:952", "Iota:921", "iota:953",
            "Kappa:922", "kappa:954", "Lambda:923", "lambda:955", "Mu:924", "mu:956", "Nu:925", "nu:957", "Xi:926",
            "xi:958", "Omicron:927", "omicron:959", "Pi:928", "pi:960", "Rho:929", "rho:961", "Sigma:931",
            "sigmaf:962", "sigma:963", "Tau:932", "tau:964", "Upsilon:933", "upsilon:965", "Phi:934", "phi:966",
            "Chi:935", "chi:967", "Psi:936", "psi:968", "Omega:937", "omega:969", "thetasym:977", "upsih:978",
            "piv:982", "forall:8704", "part:8706", "exist:8707", "empty:8709", "nabla:8711", "isin:8712", "notin:8713",
            "ni:8715", "prod:8719", "sum:8721", "minus:8722", "lowast:8727", "radic:8730", "prop:8733", "infin:8734",
            "ang:8736", "and:8743", "or:8744", "cap:8745", "cup:8746", "int:8747", "there4:8756", "sim:8764",
            "cong:8773", "asymp:8776", "ne:8800", "equiv:8801", "le:8804", "ge:8805", "sub:8834", "sup:8835",
            "nsub:8836", "sube:8838", "supe:8839", "oplus:8853", "otimes:8855", "perp:8869", "sdot:8901", "loz:9674",
            "lceil:8968", "rceil:8969", "lfloor:8970", "rfloor:8971", "lang:9001", "rang:9002", "larr:8592",
            "uarr:8593", "rarr:8594", "darr:8595", "harr:8596", "crarr:8629", "lArr:8656", "uArr:8657", "rArr:8658",
            "dArr:8659", "hArr:8660", "bull:8226", "prime:8242", "oline:8254", "frasl:8260", "weierp:8472",
            "image:8465", "real:8476", "trade:8482", "euro:8364", "alefsym:8501", "spades:9824", "clubs:9827",
            "hearts:9829", "diams:9830", "ensp:8194", "emsp:8195", "thinsp:8201", "zwnj:8204", "zwj:8205", "lrm:8206",
            "rlm:8207", "ndash:8211", "mdash:8212", "lsquo:8216", "rsquo:8217", "sbquo:8218", "ldquo:8220",
            "rdquo:8221", "bdquo:8222", "dagger:8224", "Dagger:8225", "hellip:8230", "permil:8240", "lsaquo:8249",
            "rsaquo:8250" };

    static {
        for (int i = 0; i < CHARS.length; i++) {
            String token = CHARS[i];
            int idx = token.indexOf(':');
            String key = token.substring(0, idx);
            int ch = Integer.parseInt(token.substring(idx + 1));
            Character character = new Character((char) ch);
            charMap.put(key, character);
            codeMap.put(character, key);
        }
    }

    public static String convertStringToHtml(String s) {
        if (s == null) {
            return null;
        }
        if (s.length() == 0) {
            return s;
        }
        StringBuffer buff = new StringBuffer();
        for (int i = 0; i < s.length(); i++) {
            char ch = s.charAt(i);
            Character c = new Character(ch);
            String token = (String) codeMap.get(c);
            if (token == null) {
                if (ch < 128) {
                    buff.append(ch);
                } else {
                    buff.append('&');
                    buff.append('#');
                    buff.append((int) ch);
                    buff.append(';');
                }
            } else {
                buff.append('&');
                buff.append(token);
                buff.append(';');
            }
        }
        return buff.toString();
    }

    public static String convertHtmlToString(String html) {
        if (html == null) {
            return null;
        }
        if (html.length() == 0) {
            return html;
        }
        if (html.indexOf('&') < 0) {
            return html;
        }
        StringBuffer buff = new StringBuffer();
        for (int i = 0; i < html.length(); i++) {
            char ch = html.charAt(i);
            if (ch != '&') {
                buff.append(ch);
                continue;
            }
            int idx = html.indexOf(';', i + 1);
            if (idx < 0) {
                buff.append("???");
                continue;
            }
            String key = html.substring(i + 1, idx);
            Character repl;
            if (key.startsWith("#")) {
                try {
                    int code = Integer.parseInt(key.substring(1));
                    if (code < 0 || code > 0xffff) {
                        repl = null;
                    } else {
                        repl = new Character((char) code);
                    }
                } catch (NumberFormatException e) {
                    repl = null;
                }
            } else {
                repl = (Character) charMap.get(key);
            }
            if (repl == null) {
                buff.append("???" + key + "???");
                continue;
            } else {
                buff.append(repl.charValue());
            }
            i = idx;
        }
        return buff.toString();
    }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -