📄 xmlwriter.java
字号:
int off = 0, len = buf.length; // we can't add line breaks to attribute/entity/... values noWrap = true; rawWrite ('"'); escapeChars (buf, off, len, code); rawWrite ('"'); noWrap = false; } // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1 // (Latin/1) characters, all codes: 160-255 (0xA0-0xFF). // Codes 128-159 have no assigned values. private static final String HTMLlat1x [] = { // 160 "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", // 170 "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", // 180 "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", // 190 "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", // 200 "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", // 210 "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", // 220 "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", // 230 "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", // 240 "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", // 250 "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml" }; // From "HTMLsymbolx.ent" ... some of the symbols that // we can conveniently handle. Entities for the Greek. // alphabet (upper and lower cases) are compact. private static final String HTMLsymbolx_GR [] = { // 913 "Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Eta", "Theta", "Iota", "Kappa", // 923 "Lambda", "Mu", "Nu", "Xi", "Omicron", "Pi", "Rho", null, "Sigma", "Tau", // 933 "Upsilon", "Phi", "Chi", "Psi", "Omega" }; private static final String HTMLsymbolx_gr [] = { // 945 "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "iota", "kappa", // 955 "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigmaf", "sigma", "tau", // 965 "upsilon", "phi", "chi", "psi", "omega" }; // General routine to write text and substitute predefined // entities (XML, and a special case for XHTML) as needed. private void escapeChars (char buf [], int off, int len, int code) throws SAXException, IOException { int first = 0; if (off < 0) { off = 0; len = buf.length; } for (int i = 0; i < len; i++) { String esc; char c = buf [off + i]; switch (c) { // Note that CTX_ATTRIBUTE isn't explicitly tested here; // all syntax delimiters are escaped in CTX_ATTRIBUTE, // otherwise it's similar to CTX_CONTENT // ampersand flags entity references; entity replacement // text has unexpanded references, other text doesn't. case '&': if (code == CTX_ENTITY || code == CTX_UNPARSED) continue; esc = "amp"; break; // attributes and text may NOT have literal '<', but // entities may have markup constructs case '<': if (code == CTX_ENTITY || code == CTX_UNPARSED) continue; esc = "lt"; break; // as above re markup constructs; but otherwise // except when canonicalizing, this is for consistency case '>': if (code == CTX_ENTITY || code == CTX_UNPARSED) continue; esc = "gt"; break; case '\'': if (code == CTX_CONTENT || code == CTX_UNPARSED) continue; if (canonical) continue; esc = "apos"; break; // needed when printing quoted attribute/entity values case '"': if (code == CTX_CONTENT || code == CTX_UNPARSED) continue; esc = "quot"; break; // make line ends work per host OS convention case '\n': esc = eol; break; // // No other characters NEED special treatment ... except // for encoding-specific issues, like whether the character // can really be represented in that encoding. // default: // // There are characters we can never write safely; getting // them is an error. // // (a) They're never legal in XML ... detected by range // checks, and (eventually) by remerging surrogate // pairs on output. (Easy error for apps to prevent.) // // (b) This encoding can't represent them, and we // can't make reference substitution (e.g. inside // CDATA sections, names, PI data, etc). (Hard for // apps to prevent, except by using UTF-8 or UTF-16 // as their output encoding.) // // We know a very little bit about what characters // the US-ASCII and ISO-8859-1 encodings support. For // other encodings we can't detect the second type of // error at all. (Never an issue for UTF-8 or UTF-16.) //// FIXME: CR in CDATA is an error; in text, turn to a char ref// FIXME: CR/LF/TAB in attributes should become char refs if ((c > 0xfffd) || ((c < 0x0020) && !((c == 0x0009) || (c == 0x000A) || (c == 0x000D))) || (((c & dangerMask) != 0) && (code == CTX_UNPARSED))) { // if case (b) in CDATA, we might end the section, // write a reference, then restart ... possible // in one DOM L3 draft. throw new CharConversionException ( "Illegal or non-writable character: U+" + Integer.toHexString (c)); } // // If the output encoding represents the character // directly, let it do so! Else we'll escape it. // if ((c & dangerMask) == 0) continue; esc = null; // Avoid numeric refs where symbolic ones exist, as // symbolic ones make more sense to humans reading! if (xhtml) { // all the HTMLlat1x.ent entities // (all the "ISO-8859-1" characters) if (c >= 160 && c <= 255) esc = HTMLlat1x [c - 160]; // not quite half the HTMLsymbolx.ent entities else if (c >= 913 && c <= 937) esc = HTMLsymbolx_GR [c - 913]; else if (c >= 945 && c <= 969) esc = HTMLsymbolx_gr [c - 945]; else switch (c) { // all of the HTMLspecialx.ent entities case 338: esc = "OElig"; break; case 339: esc = "oelig"; break; case 352: esc = "Scaron"; break; case 353: esc = "scaron"; break; case 376: esc = "Yuml"; break; case 710: esc = "circ"; break; case 732: esc = "tilde"; break; case 8194: esc = "ensp"; break; case 8195: esc = "emsp"; break; case 8201: esc = "thinsp"; break; case 8204: esc = "zwnj"; break; case 8205: esc = "zwj"; break; case 8206: esc = "lrm"; break; case 8207: esc = "rlm"; break; case 8211: esc = "ndash"; break; case 8212: esc = "mdash"; break; case 8216: esc = "lsquo"; break; case 8217: esc = "rsquo"; break; case 8218: esc = "sbquo"; break; case 8220: esc = "ldquo"; break; case 8221: esc = "rdquo"; break; case 8222: esc = "bdquo"; break; case 8224: esc = "dagger"; break; case 8225: esc = "Dagger"; break; case 8240: esc = "permil"; break; case 8249: esc = "lsaquo"; break; case 8250: esc = "rsaquo"; break; case 8364: esc = "euro"; break; // the other HTMLsymbox.ent entities case 402: esc = "fnof"; break; case 977: esc = "thetasym"; break; case 978: esc = "upsih"; break; case 982: esc = "piv"; break; case 8226: esc = "bull"; break; case 8230: esc = "hellip"; break; case 8242: esc = "prime"; break; case 8243: esc = "Prime"; break; case 8254: esc = "oline"; break; case 8260: esc = "frasl"; break; case 8472: esc = "weierp"; break; case 8465: esc = "image"; break; case 8476: esc = "real"; break; case 8482: esc = "trade"; break; case 8501: esc = "alefsym"; break; case 8592: esc = "larr"; break; case 8593: esc = "uarr"; break; case 8594: esc = "rarr"; break; case 8595: esc = "darr"; break; case 8596: esc = "harr"; break; case 8629: esc = "crarr"; break; case 8656: esc = "lArr"; break; case 8657: esc = "uArr"; break; case 8658: esc = "rArr"; break; case 8659: esc = "dArr"; break; case 8660: esc = "hArr"; break; case 8704: esc = "forall"; break; case 8706: esc = "part"; break; case 8707: esc = "exist"; break; case 8709: esc = "empty"; break; case 8711: esc = "nabla"; break; case 8712: esc = "isin"; break; case 8713: esc = "notin"; break; case 8715: esc = "ni"; break; case 8719: esc = "prod"; break; case 8721: esc = "sum"; break; case 8722: esc = "minus"; break; case 8727: esc = "lowast"; break; case 8730: esc = "radic"; break; case 8733: esc = "prop"; break; case 8734: esc = "infin"; break; case 8736: esc = "ang"; break; case 8743: esc = "and"; break; case 8744: esc = "or"; break; case 8745: esc = "cap"; break; case 8746: esc = "cup"; break; case 8747: esc = "int"; break; case 8756: esc = "there4"; break; case 8764: esc = "sim"; break; case 8773: esc = "cong"; break; case 8776: esc = "asymp"; break; case 8800: esc = "ne"; break; case 8801: esc = "equiv"; break; case 8804: esc = "le"; break; case 8805: esc = "ge"; break; case 8834: esc = "sub"; break; case 8835: esc = "sup"; break; case 8836: esc = "nsub"; break; case 8838: esc = "sube"; break; case 8839: esc = "supe"; break; case 8853: esc = "oplus"; break; case 8855: esc = "otimes"; break; case 8869: esc = "perp"; break; case 8901: esc = "sdot"; break; case 8968: esc = "lceil"; break; case 8969: esc = "rceil"; break; case 8970: esc = "lfloor"; break; case 8971: esc = "rfloor"; break; case 9001: esc = "lang"; break; case 9002: esc = "rang"; break; case 9674: esc = "loz"; break; case 9824: esc = "spades"; break; case 9827: esc = "clubs"; break; case 9829: esc = "hearts"; break; case 9830: esc = "diams"; break; } } // else escape with numeric char refs if (esc == null) { stringBuf.setLength (0); stringBuf.append ("#x"); stringBuf.append (Integer.toHexString (c).toUpperCase ()); esc = stringBuf.toString (); // FIXME: We don't write surrogate pairs correctly. // They should work as one ref per character, since // each pair is one character. For reading back into // Unicode, it matters beginning in Unicode 3.1 ... } break; } if (i != first) rawWrite (buf, off + first, i - first); first = i + 1; if (esc == eol) newline (); else { rawWrite ('&'); rawWrite (esc); rawWrite (';'); } } if (first < len) rawWrite (buf, off + first, len - first); } private void newline () throws SAXException, IOException { out.write (eol); column = 0; } private void doIndent () throws SAXException, IOException { int space = elementNestLevel * 2; newline (); column = space; // track tabs only at line starts while (space > 8) { out.write ("\t"); space -= 8; } while (space > 0) { out.write (" "); space -= 2; } } private void rawWrite (char c) throws IOException { out.write (c); column++; } private void rawWrite (String s) throws SAXException, IOException { if (prettyPrinting && "default".equals (space.peek ())) { char data [] = s.toCharArray (); rawWrite (data, 0, data.length); } else { out.write (s); column += s.length (); } } // NOTE: if xhtml, the REC gives some rules about whitespace // which we could follow ... notably, many places where conformant // agents "must" consolidate/normalize whitespace. Line ends can // be removed there, etc. This may not be the right place to do // such mappings though. // Line buffering may help clarify algorithms and improve results. // It's likely xml:space needs more attention. private void rawWrite (char buf [], int offset, int length) throws SAXException, IOException { boolean wrap; if (prettyPrinting && space.empty ()) fatal ("stack discipline", null); wrap = prettyPrinting && "default".equals (space.peek ()); if (!wrap) { out.write (buf, offset, length); column += length; return; } // we're pretty printing and want to fill lines out only // to the desired line length. while (length > 0) { int target = lineLength - column; boolean wrote = false; // Do we even have a problem? if (target > length || noWrap) { out.write (buf, offset, length); column += length; return; } // break the line at a space character, trying to fill // as much of the line as possible. char c; for (int i = target - 1; i >= 0; i--) { if ((c = buf [offset + i]) == ' ' || c == '\t') { i++; out.write (buf, offset, i); doIndent (); offset += i; length -= i; wrote = true; break; } } if (wrote) continue; // no space character permitting break before target // line length is filled. So, take the next one. if (target < 0) target = 0; for (int i = target; i < length; i++) if ((c = buf [offset + i]) == ' ' || c == '\t') { i++; out.write (buf, offset, i); doIndent (); offset += i; length -= i; wrote = true; break; } if (wrote) continue; // no such luck. out.write (buf, offset, length); column += length; break; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -