📄 translate.java

📁 html 解析处理代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
                        if (null == key)                            key = new CharacterReferenceEx ();                        key.setKernel (string);                        key.setStart (index);                        key.setEnd (semi);                        item = lookup (key);                        if (null != item)                        {                            buffer.append ((char)item.getCharacter ());                            index += item.getKernel ().length ();                            if ((index < length) && (';' == string.charAt (index)))                                index++;                            amp = index;                        }                    }                    else                    {                        // need do nothing here, the ampersand will be consumed below                    }                }                // gather up unconsumed characters                while (amp < index)                    buffer.append (string.charAt (amp++));            }            while ((index < length) && (-1 != (amp = string.indexOf ('&', index))));            // equivalent to buffer.append (string.substring (index));            // but without the allocation of a new String            while (index < length)                buffer.append (string.charAt (index++));            ret = buffer.toString ();        }        return (ret);    }    /**     * Decode the characters in a string buffer containing references.     * Change all numeric character reference and character entity references     * to unicode characters.     * @param buffer The StringBuffer containing references.     * @return The decoded string.     */    public static String decode (StringBuffer buffer)    {        return decode (buffer.toString());    }    /**     * Decode a stream containing references.     * Change all numeric character reference and character entity references     * to unicode characters. If <code>DECODE_LINE_BY_LINE</code> is true,     * the input stream is broken up into lines, terminated by either     * carriage return or newline, in order to reduce the latency and maximum     * buffering memory size required.     * @param in The stream to translate. It is assumed that the input     * stream is encoded with ISO-8859-1 since the table of character     * entity references in this class applies only to ISO-8859-1.     * @param out The stream to write the decoded stream to.     */    public static void decode (InputStream in, PrintStream out)    {        Reader reader;        StringBuffer buffer;        int character;        String string;        boolean newlines;        try        {            try            {                reader = new BufferedReader (new InputStreamReader (in, "ISO-8859-1"));            }            catch (UnsupportedEncodingException use)            {                // yeah, like this will happen; OK, assume the default is ISO-8859-1                reader = new BufferedReader (new InputStreamReader (in));            }            buffer = new StringBuffer (1024);            newlines = false;            if (DECODE_LINE_BY_LINE)                while (-1 != (character = reader.read ()))                {                    if (('\r' == character) || ('\n' == character))                    {                        if (!newlines)                        {                            string = decode (buffer.toString ());                            out.print (string);                            buffer.setLength (0);                            newlines = true;                        }                        buffer.append ((char)character);                    }                    else                    {                        if (newlines)                        {                            out.print (buffer.toString ());                            buffer.setLength (0);                            newlines = false;                        }                        buffer.append ((char)character);                    }                }            else                while (-1 != (character = reader.read ()))                    buffer.append ((char)character);            if (0 != buffer.length ())            {                if (newlines)                    out.print (buffer.toString ());                else                {                    string = decode (buffer.toString ());                    out.print (string);                }            }        }        catch (IOException ioe)        {            out.println ();            out.println (ioe.getMessage ());        }        finally        {            out.flush ();        }    }    /**     * Convert a character to a numeric character reference.     * Convert a unicode character to a numeric character reference of     * the form &amp;#xxxx;.     * @param character The character to convert.     * @return The converted character.     */    public static String encode (int character)    {        StringBuffer ret;        ret = new StringBuffer (13); /* &#2147483647; */        ret.append ("&#");        if (ENCODE_HEXADECIMAL)        {            ret.append ("x");            ret.append (Integer.toHexString (character));        }        else            ret.append (character);        ret.append (';');        return (ret.toString ());    }        /**     * Encode a string to use references.     * Change all characters that are not ISO-8859-1 to their numeric character     * reference or character entity reference.     * @param string The string to translate.     * @return The encoded string.     */    public static String encode (String string)    {        int length;        char c;        CharacterReference candidate;        StringBuffer ret;        ret = new StringBuffer (string.length () * 6);        length  = string.length ();        for (int i = 0; i < length; i++)        {            c = string.charAt (i);            candidate = lookup (c);            if (null != candidate)            {                ret.append ('&');                ret.append (candidate.getKernel ());                ret.append (';');            }            else if (!(c < 0x007F))            {                ret.append ("&#");                if (ENCODE_HEXADECIMAL)                {                    ret.append ("x");                    ret.append (Integer.toHexString (c));                }                else                    ret.append ((int)c);                ret.append (';');            }            else                ret.append (c);        }        return (ret.toString ());    }    /**     * Encode a stream to use references.     * Change all characters that are not ISO-8859-1 to their numeric character     * reference or character entity reference.     * @param in The stream to translate. It is assumed that the input     * stream is encoded with ISO-8859-1 since the table of character     * entity references in this class applies only to ISO-8859-1.     * @param out The stream to write the decoded stream to.     */    public static void encode (InputStream in, PrintStream out)    {        Reader reader;        char c;        int index;        CharacterReference candidate;        PrintWriter output;        try        {            reader = new BufferedReader (new InputStreamReader (in, "ISO-8859-1"));            output = new PrintWriter (new BufferedWriter (new OutputStreamWriter (out, "ISO-8859-1")));        }        catch (UnsupportedEncodingException use)        {            // yeah, like this will happen; OK, assume default is ISO-8859-1            reader = new BufferedReader (new InputStreamReader (in));            output = new PrintWriter (new BufferedWriter (new OutputStreamWriter (out)));        }        try        {            while (-1 != (index = reader.read ()))            {                c = (char)index;                candidate = lookup (c);                if (null != candidate)                {                    output.print ('&');                    output.print (candidate.getKernel ());                    output.print (';');                }                else if (!(c < 0x007F))                {                    output.print ("&#");                    if (ENCODE_HEXADECIMAL)                    {                        output.print ("x");                        output.print (Integer.toHexString (c));                    }                    else                        output.print ((int)c);                    output.print (';');                }                else                    output.print (c);            }        }        catch (IOException ioe)        {            output.println ();            output.println (ioe.getMessage ());        }        finally        {            output.flush ();        }    }    /**     * Numeric character reference and character entity reference to unicode codec.     * Translate the <code>System.in</code> input into an encoded or decoded     * stream and send the results to <code>System.out</code>.     * @param args If arg[0] is <code>-encode</code> perform an encoding on     * <code>System.in</code>, otherwise perform a decoding.     */    public static void main (String[] args)    {        boolean encode;        if (0 < args.length && args[0].equalsIgnoreCase ("-encode"))            encode = true;        else            encode = false;        if (encode)            encode (System.in, System.out);        else            decode (System.in, System.out);    }}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -