tohtmlstream.java
来自「Mobile 应用程序使用 Java Micro Edition (Java M」· Java 代码 · 共 1,824 行 · 第 1/5 页
JAVA
1,824 行
* with <CODE>%HH</CODE>, where HH is the hex of the byte value. * * @param string String to convert to XML format. * @param doURLEscaping True if we should try to encode as * per http://www.ietf.org/rfc/rfc2396.txt. * * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. */ public void writeAttrURI( final java.io.Writer writer, String string, boolean doURLEscaping) throws IOException { // http://www.ietf.org/rfc/rfc2396.txt says: // A URI is always in an "escaped" form, since escaping or unescaping a // completed URI might change its semantics. Normally, the only time // escape encodings can safely be made is when the URI is being created // from its component parts; each component may have its own set of // characters that are reserved, so only the mechanism responsible for // generating or interpreting that component can determine whether or // not escaping a character will change its semantics. Likewise, a URI // must be separated into its components before the escaped characters // within those components can be safely decoded. // // ...So we do our best to do limited escaping of the URL, without // causing damage. If the URL is already properly escaped, in theory, this // function should not change the string value. final int end = string.length(); if (end > m_attrBuff.length) { m_attrBuff = new char[end*2 + 1]; } string.getChars(0,end, m_attrBuff, 0); final char[] chars = m_attrBuff; int cleanStart = 0; int cleanLength = 0; char ch = 0; for (int i = 0; i < end; i++) { ch = chars[i]; if ((ch < 32) || (ch > 126)) { if (cleanLength > 0) { writer.write(chars, cleanStart, cleanLength); cleanLength = 0; } if (doURLEscaping) { // Encode UTF16 to UTF8. // Reference is Unicode, A Primer, by Tony Graham. // Page 92. // Note that Kay doesn't escape 0x20... // if(ch == 0x20) // Not sure about this... -sb // { // writer.write(ch); // } // else if (ch <= 0x7F) { writer.write('%'); writer.write(makeHHString(ch)); } else if (ch <= 0x7FF) { // Clear low 6 bits before rotate, put high 4 bits in low byte, // and set two high bits. int high = (ch >> 6) | 0xC0; int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit writer.write('%'); writer.write(makeHHString(high)); writer.write('%'); writer.write(makeHHString(low)); } else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate { // I'm sure this can be done in 3 instructions, but I choose // to try and do it exactly like it is done in the book, at least // until we are sure this is totally clean. I don't think performance // is a big issue with this particular function, though I could be // wrong. Also, the stuff below clearly does more masking than // it needs to do. // Clear high 6 bits. int highSurrogate = ((int) ch) & 0x03FF; // Middle 4 bits (wwww) + 1 // "Note that the value of wwww from the high surrogate bit pattern // is incremented to make the uuuuu bit pattern in the scalar value // so the surrogate pair don't address the BMP." int wwww = ((highSurrogate & 0x03C0) >> 6); int uuuuu = wwww + 1; // next 4 bits int zzzz = (highSurrogate & 0x003C) >> 2; // low 2 bits int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; // Get low surrogate character. ch = chars[++i]; // Clear high 6 bits. int lowSurrogate = ((int) ch) & 0x03FF; // put the middle 4 bits into the bottom of yyyyyy (byte 3) yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); // bottom 6 bits. int xxxxxx = (lowSurrogate & 0x003F); int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu int byte2 = 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; int byte3 = 0x80 | yyyyyy; int byte4 = 0x80 | xxxxxx; writer.write('%'); writer.write(makeHHString(byte1)); writer.write('%'); writer.write(makeHHString(byte2)); writer.write('%'); writer.write(makeHHString(byte3)); writer.write('%'); writer.write(makeHHString(byte4)); } else { int high = (ch >> 12) | 0xE0; // top 4 bits int middle = ((ch & 0x0FC0) >> 6) | 0x80; // middle 6 bits int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit writer.write('%'); writer.write(makeHHString(high)); writer.write('%'); writer.write(makeHHString(middle)); writer.write('%'); writer.write(makeHHString(low)); } } else if (escapingNotNeeded(ch)) { writer.write(ch); } else { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } // In this character range we have first written out any previously accumulated // "clean" characters, then processed the current more complicated character, // which may have incremented "i". // We now we reset the next possible clean character. cleanStart = i + 1; } // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as // not allowing quotes in the URI proper syntax, nor in the fragment // identifier, we believe that it's OK to double escape quotes. else if (ch == '"') { // If the character is a '%' number number, try to avoid double-escaping. // There is a question if this is legal behavior. // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) // We are no longer escaping '%' if (cleanLength > 0) { writer.write(chars, cleanStart, cleanLength); cleanLength = 0; } // Mike Kay encodes this as ", so he may know something I don't? if (doURLEscaping) writer.write("%22"); else writer.write("""); // we have to escape this, I guess. // We have written out any clean characters, then the escaped '%' and now we // We now we reset the next possible clean character. cleanStart = i + 1; } else { // no processing for this character, just count how // many characters in a row that we have that need no processing cleanLength++; } } // are there any clean characters at the end of the array // that we haven't processed yet? if (cleanLength > 1) { // if the whole string can be written out as-is do so // otherwise write out the clean chars at the end of the // array if (cleanStart == 0) writer.write(string); else writer.write(chars, cleanStart, cleanLength); } else if (cleanLength == 1) { // a little optimization for 1 clean character // (we could have let the previous if(...) handle them all) writer.write(ch); } } /** * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. * * @param string String to convert to XML format. * @param encoding CURRENTLY NOT IMPLEMENTED. * * @throws org.xml.sax.SAXException */ public void writeAttrString( final java.io.Writer writer, String string, String encoding) throws IOException { final int end = string.length(); if (end > m_attrBuff.length) { m_attrBuff = new char[end * 2 + 1]; } string.getChars(0, end, m_attrBuff, 0); final char[] chars = m_attrBuff; int cleanStart = 0; int cleanLength = 0; char ch = 0; for (int i = 0; i < end; i++) { ch = chars[i]; // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); // System.out.println("ch: "+(int)ch); // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) { cleanLength++; } else if ('<' == ch || '>' == ch) { cleanLength++; // no escaping in this case, as specified in 15.2 } else if ( ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) { cleanLength++; // no escaping in this case, as specified in 15.2 } else { if (cleanLength > 0) { writer.write(chars,cleanStart,cleanLength); cleanLength = 0; } int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); if (i != pos) { i = pos - 1; } else { if (Encodings.isHighUTF16Surrogate(ch)) { writeUTF16Surrogate(ch, chars, i, end); i++; // two input characters processed // this increments by one and the for() // loop itself increments by another one. } // The next is kind of a hack to keep from escaping in the case // of Shift_JIS and the like. /* else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) && (ch != 160)) { writer.write(ch); // no escaping in this case } else */ String outputStringForChar = m_charInfo.getOutputStringForChar(ch); if (null != outputStringForChar) { writer.write(outputStringForChar); } else if (escapingNotNeeded(ch)) { writer.write(ch); // no escaping in this case } else { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } } cleanStart = i + 1; } } // end of for() // are there any clean characters at the end of the array // that we haven't processed yet? if (cleanLength > 1) { // if the whole string can be written out as-is do so // otherwise write out the clean chars at the end of the // array if (cleanStart == 0) writer.write(string); else writer.write(chars, cleanStart, cleanLength); } else if (cleanLength == 1) { // a little optimization for 1 clean character // (we could have let the previous if(...) handle them all) writer.write(ch); } } /** * Receive notification of character data. * * <p>The Parser will call this method to report each chunk of * character data. SAX parsers may return all contiguous character * data in a single chunk, or they may split it into several * chunks; however, all of the characters in any single event * must come from the same external entity, so that the Locator * provides useful information.</p> * * <p>The application must not attempt to read from the array * outside of the specified range.</p> * * <p>Note that some parsers will report whitespace using the * ignorableWhitespace() method rather than this one (validating * parsers must do so).</p>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?