tohtmlstream.java

来自「Mobile 应用程序使用 Java Micro Edition (Java M」· Java 代码 · 共 1,824 行 · 第 1/5 页

JAVA
1,824
字号
     * with <CODE>%HH</CODE>, where HH is the hex of the byte value.     *     * @param   string      String to convert to XML format.     * @param doURLEscaping True if we should try to encode as      *                      per http://www.ietf.org/rfc/rfc2396.txt.     *     * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.     */    public void writeAttrURI(        final java.io.Writer writer, String string, boolean doURLEscaping)        throws IOException    {        // http://www.ietf.org/rfc/rfc2396.txt says:        // A URI is always in an "escaped" form, since escaping or unescaping a        // completed URI might change its semantics.  Normally, the only time        // escape encodings can safely be made is when the URI is being created        // from its component parts; each component may have its own set of        // characters that are reserved, so only the mechanism responsible for        // generating or interpreting that component can determine whether or        // not escaping a character will change its semantics. Likewise, a URI        // must be separated into its components before the escaped characters        // within those components can be safely decoded.        //        // ...So we do our best to do limited escaping of the URL, without         // causing damage.  If the URL is already properly escaped, in theory, this         // function should not change the string value.        final int end = string.length();        if (end > m_attrBuff.length)        {           m_attrBuff = new char[end*2 + 1];                       }        string.getChars(0,end, m_attrBuff, 0);         final char[] chars = m_attrBuff;        int cleanStart = 0;        int cleanLength = 0;                        char ch = 0;        for (int i = 0; i < end; i++)        {            ch = chars[i];            if ((ch < 32) || (ch > 126))            {                if (cleanLength > 0)                {                    writer.write(chars, cleanStart, cleanLength);                    cleanLength = 0;                }                if (doURLEscaping)                {                    // Encode UTF16 to UTF8.                    // Reference is Unicode, A Primer, by Tony Graham.                    // Page 92.                    // Note that Kay doesn't escape 0x20...                    //  if(ch == 0x20) // Not sure about this... -sb                    //  {                    //    writer.write(ch);                    //  }                    //  else                     if (ch <= 0x7F)                    {                        writer.write('%');                        writer.write(makeHHString(ch));                    }                    else if (ch <= 0x7FF)                    {                        // Clear low 6 bits before rotate, put high 4 bits in low byte,                         // and set two high bits.                        int high = (ch >> 6) | 0xC0;                        int low = (ch & 0x3F) | 0x80;                        // First 6 bits, + high bit                        writer.write('%');                        writer.write(makeHHString(high));                        writer.write('%');                        writer.write(makeHHString(low));                    }                    else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate                    {                        // I'm sure this can be done in 3 instructions, but I choose                         // to try and do it exactly like it is done in the book, at least                         // until we are sure this is totally clean.  I don't think performance                         // is a big issue with this particular function, though I could be                         // wrong.  Also, the stuff below clearly does more masking than                         // it needs to do.                        // Clear high 6 bits.                        int highSurrogate = ((int) ch) & 0x03FF;                        // Middle 4 bits (wwww) + 1                        // "Note that the value of wwww from the high surrogate bit pattern                        // is incremented to make the uuuuu bit pattern in the scalar value                         // so the surrogate pair don't address the BMP."                        int wwww = ((highSurrogate & 0x03C0) >> 6);                        int uuuuu = wwww + 1;                        // next 4 bits                        int zzzz = (highSurrogate & 0x003C) >> 2;                        // low 2 bits                        int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;                        // Get low surrogate character.                        ch = chars[++i];                        // Clear high 6 bits.                        int lowSurrogate = ((int) ch) & 0x03FF;                        // put the middle 4 bits into the bottom of yyyyyy (byte 3)                        yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);                        // bottom 6 bits.                        int xxxxxx = (lowSurrogate & 0x003F);                        int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu                        int byte2 =                            0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;                        int byte3 = 0x80 | yyyyyy;                        int byte4 = 0x80 | xxxxxx;                        writer.write('%');                        writer.write(makeHHString(byte1));                        writer.write('%');                        writer.write(makeHHString(byte2));                        writer.write('%');                        writer.write(makeHHString(byte3));                        writer.write('%');                        writer.write(makeHHString(byte4));                    }                    else                    {                        int high = (ch >> 12) | 0xE0; // top 4 bits                        int middle = ((ch & 0x0FC0) >> 6) | 0x80;                        // middle 6 bits                        int low = (ch & 0x3F) | 0x80;                        // First 6 bits, + high bit                        writer.write('%');                        writer.write(makeHHString(high));                        writer.write('%');                        writer.write(makeHHString(middle));                        writer.write('%');                        writer.write(makeHHString(low));                    }                }                else if (escapingNotNeeded(ch))                {                    writer.write(ch);                }                else                {                    writer.write("&#");                    writer.write(Integer.toString(ch));                    writer.write(';');                }                // In this character range we have first written out any previously accumulated                 // "clean" characters, then processed the current more complicated character,                // which may have incremented "i".                // We now we reset the next possible clean character.                cleanStart = i + 1;            }            // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as            // not allowing quotes in the URI proper syntax, nor in the fragment             // identifier, we believe that it's OK to double escape quotes.            else if (ch == '"')            {                // If the character is a '%' number number, try to avoid double-escaping.                // There is a question if this is legal behavior.                // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded                // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.                //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )                // We are no longer escaping '%'                if (cleanLength > 0)                {                    writer.write(chars, cleanStart, cleanLength);                    cleanLength = 0;                }                                                   // Mike Kay encodes this as &#34;, so he may know something I don't?                if (doURLEscaping)                    writer.write("%22");                else                    writer.write("&quot;"); // we have to escape this, I guess.                // We have written out any clean characters, then the escaped '%' and now we                // We now we reset the next possible clean character.                cleanStart = i + 1;                }            else            {                // no processing for this character, just count how                // many characters in a row that we have that need no processing                cleanLength++;            }        }                // are there any clean characters at the end of the array        // that we haven't processed yet?        if (cleanLength > 1)        {            // if the whole string can be written out as-is do so            // otherwise write out the clean chars at the end of the            // array            if (cleanStart == 0)                writer.write(string);            else                writer.write(chars, cleanStart, cleanLength);        }        else if (cleanLength == 1)        {            // a little optimization for 1 clean character            // (we could have let the previous if(...) handle them all)            writer.write(ch);        }    }    /**     * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,     * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.     *     * @param   string      String to convert to XML format.     * @param   encoding    CURRENTLY NOT IMPLEMENTED.     *     * @throws org.xml.sax.SAXException     */    public void writeAttrString(        final java.io.Writer writer, String string, String encoding)        throws IOException    {        final int end = string.length();        if (end > m_attrBuff.length)        {            m_attrBuff = new char[end * 2 + 1];        }        string.getChars(0, end, m_attrBuff, 0);        final char[] chars = m_attrBuff;                int cleanStart = 0;        int cleanLength = 0;        char ch = 0;        for (int i = 0; i < end; i++)        {            ch = chars[i];            // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);            // System.out.println("ch: "+(int)ch);            // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);            // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))            {                cleanLength++;            }            else if ('<' == ch || '>' == ch)            {                cleanLength++; // no escaping in this case, as specified in 15.2            }            else if (                ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))            {                cleanLength++; // no escaping in this case, as specified in 15.2            }            else            {                if (cleanLength > 0)                {                    writer.write(chars,cleanStart,cleanLength);                    cleanLength = 0;                }                int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);                if (i != pos)                {                    i = pos - 1;                }                else                {                    if (Encodings.isHighUTF16Surrogate(ch))                    {                             writeUTF16Surrogate(ch, chars, i, end);                            i++; // two input characters processed                                 // this increments by one and the for()                                 // loop itself increments by another one.                    }                    // The next is kind of a hack to keep from escaping in the case                     // of Shift_JIS and the like.                    /*                    else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)                    && (ch != 160))                    {                    writer.write(ch);  // no escaping in this case                    }                    else                    */                    String outputStringForChar = m_charInfo.getOutputStringForChar(ch);                    if (null != outputStringForChar)                    {                        writer.write(outputStringForChar);                    }                    else if (escapingNotNeeded(ch))                    {                        writer.write(ch); // no escaping in this case                    }                    else                    {                        writer.write("&#");                        writer.write(Integer.toString(ch));                        writer.write(';');                    }                }                cleanStart = i + 1;            }        } // end of for()                // are there any clean characters at the end of the array        // that we haven't processed yet?        if (cleanLength > 1)        {            // if the whole string can be written out as-is do so            // otherwise write out the clean chars at the end of the            // array            if (cleanStart == 0)                writer.write(string);            else                writer.write(chars, cleanStart, cleanLength);        }        else if (cleanLength == 1)        {            // a little optimization for 1 clean character            // (we could have let the previous if(...) handle them all)            writer.write(ch);        }    }    /**     * Receive notification of character data.     *     * <p>The Parser will call this method to report each chunk of     * character data.  SAX parsers may return all contiguous character     * data in a single chunk, or they may split it into several     * chunks; however, all of the characters in any single event     * must come from the same external entity, so that the Locator     * provides useful information.</p>     *     * <p>The application must not attempt to read from the array     * outside of the specified range.</p>     *     * <p>Note that some parsers will report whitespace using the     * ignorableWhitespace() method rather than this one (validating     * parsers must do so).</p>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?