📄 xmlwriter.java

📁 linux下建立JAVA虚拟机的源码KAFFE
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	int	off = 0, len = buf.length;	// we can't add line breaks to attribute/entity/... values	noWrap = true;	rawWrite ('"');	escapeChars (buf, off, len, code);	rawWrite ('"');	noWrap = false;    }        // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1    // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).    // Codes 128-159 have no assigned values.    private static final String HTMLlat1x [] = {	// 160	"nbsp", "iexcl", "cent", "pound", "curren",	"yen", "brvbar", "sect", "uml", "copy",	// 170	"ordf", "laquo", "not", "shy", "reg",	"macr", "deg", "plusmn", "sup2", "sup3",	// 180	"acute", "micro", "para", "middot", "cedil",	"sup1", "ordm", "raquo", "frac14", "frac12",	// 190	"frac34", "iquest", "Agrave", "Aacute", "Acirc",	"Atilde", "Auml", "Aring", "AElig", "Ccedil",	// 200	"Egrave", "Eacute", "Ecirc", "Euml", "Igrave",	"Iacute", "Icirc", "Iuml", "ETH", "Ntilde",	// 210	"Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",	"times", "Oslash", "Ugrave", "Uacute", "Ucirc",	// 220	"Uuml", "Yacute", "THORN", "szlig", "agrave",	"aacute", "acirc", "atilde", "auml", "aring",	// 230	"aelig", "ccedil", "egrave", "eacute", "ecirc",	"euml", "igrave", "iacute", "icirc", "iuml",	// 240	"eth", "ntilde", "ograve", "oacute", "ocirc",	"otilde", "ouml", "divide", "oslash", "ugrave",	// 250	"uacute", "ucirc", "uuml", "yacute", "thorn",	"yuml"    };    // From "HTMLsymbolx.ent" ... some of the symbols that    // we can conveniently handle.  Entities for the Greek.    // alphabet (upper and lower cases) are compact.    private static final String HTMLsymbolx_GR [] = {	// 913	"Alpha", "Beta", "Gamma", "Delta", "Epsilon",	"Zeta", "Eta", "Theta", "Iota", "Kappa",	// 923	"Lambda", "Mu", "Nu", "Xi", "Omicron",	"Pi", "Rho", null, "Sigma", "Tau",	// 933	"Upsilon", "Phi", "Chi", "Psi", "Omega"    };    private static final String HTMLsymbolx_gr [] = {	// 945	"alpha", "beta", "gamma", "delta", "epsilon",	"zeta", "eta", "theta", "iota", "kappa",	// 955	"lambda", "mu", "nu", "xi", "omicron",	"pi", "rho", "sigmaf", "sigma", "tau",	// 965	"upsilon", "phi", "chi", "psi", "omega"    };    // General routine to write text and substitute predefined    // entities (XML, and a special case for XHTML) as needed.    private void escapeChars (char buf [], int off, int len, int code)    throws SAXException, IOException    {	int	first = 0;	if (off < 0) {	    off = 0;	    len = buf.length;	}	for (int i = 0; i < len; i++) {	    String	esc;	    char 	c = buf [off + i];	    switch (c) {	      // Note that CTX_ATTRIBUTE isn't explicitly tested here;	      // all syntax delimiters are escaped in CTX_ATTRIBUTE,	      // otherwise it's similar to CTX_CONTENT	      // ampersand flags entity references; entity replacement	      // text has unexpanded references, other text doesn't.	      case '&':		if (code == CTX_ENTITY || code == CTX_UNPARSED)		    continue;		esc = "amp";		break;	      // attributes and text may NOT have literal '<', but	      // entities may have markup constructs	      case '<':		if (code == CTX_ENTITY || code == CTX_UNPARSED)		    continue;		esc = "lt";		break;	      // as above re markup constructs; but otherwise	      // except when canonicalizing, this is for consistency	      case '>':		if (code == CTX_ENTITY || code == CTX_UNPARSED)		    continue;	        esc = "gt";		break;	      case '\'':		if (code == CTX_CONTENT || code == CTX_UNPARSED)		    continue;		if (canonical)		    continue;		esc = "apos";		break;	      // needed when printing quoted attribute/entity values	      case '"':		if (code == CTX_CONTENT || code == CTX_UNPARSED)		    continue;		esc = "quot";		break;	      // make line ends work per host OS convention	      case '\n':		esc = eol;		break;	      //	      // No other characters NEED special treatment ... except	      // for encoding-specific issues, like whether the character	      // can really be represented in that encoding.	      //	      default:		//		// There are characters we can never write safely; getting		// them is an error.		//		//   (a) They're never legal in XML ... detected by range 		//	checks, and (eventually) by remerging surrogate		//	pairs on output.  (Easy error for apps to prevent.)		//		//   (b) This encoding can't represent them, and we		//	can't make reference substitution (e.g. inside		//	CDATA sections, names, PI data, etc).  (Hard for		//	apps to prevent, except by using UTF-8 or UTF-16		//	as their output encoding.)		//		// We know a very little bit about what characters		// the US-ASCII and ISO-8859-1 encodings support.  For		// other encodings we can't detect the second type of		// error at all.  (Never an issue for UTF-8 or UTF-16.)		//// FIXME:  CR in CDATA is an error; in text, turn to a char ref// FIXME:  CR/LF/TAB in attributes should become char refs		if ((c > 0xfffd)			|| ((c < 0x0020) && !((c == 0x0009)				|| (c == 0x000A) || (c == 0x000D)))			|| (((c & dangerMask) != 0)			    && (code == CTX_UNPARSED))) {		    // if case (b) in CDATA, we might end the section,		    // write a reference, then restart ... possible		    // in one DOM L3 draft.		    throw new CharConversionException (			    "Illegal or non-writable character: U+"			    + Integer.toHexString (c));		}		//		// If the output encoding represents the character		// directly, let it do so!  Else we'll escape it.		//		if ((c & dangerMask) == 0)		    continue;		esc = null;		// Avoid numeric refs where symbolic ones exist, as		// symbolic ones make more sense to humans reading!		if (xhtml) {		    // all the HTMLlat1x.ent entities		    // (all the "ISO-8859-1" characters)		    if (c >= 160 && c <= 255)			esc = HTMLlat1x [c - 160];		    // not quite half the HTMLsymbolx.ent entities		    else if (c >= 913 && c <= 937)			esc = HTMLsymbolx_GR [c - 913];		    else if (c >= 945 && c <= 969)			esc = HTMLsymbolx_gr [c - 945];		    else switch (c) {			// all of the HTMLspecialx.ent entities			case  338: esc = "OElig";	break;			case  339: esc = "oelig";	break;			case  352: esc = "Scaron";	break;			case  353: esc = "scaron";	break;			case  376: esc = "Yuml";	break;			case  710: esc = "circ";	break;			case  732: esc = "tilde";	break;			case 8194: esc = "ensp";	break;			case 8195: esc = "emsp";	break;			case 8201: esc = "thinsp";	break;			case 8204: esc = "zwnj";	break;			case 8205: esc = "zwj";		break;			case 8206: esc = "lrm";		break;			case 8207: esc = "rlm";		break;			case 8211: esc = "ndash";	break;			case 8212: esc = "mdash";	break;			case 8216: esc = "lsquo";	break;			case 8217: esc = "rsquo";	break;			case 8218: esc = "sbquo";	break;			case 8220: esc = "ldquo";	break;			case 8221: esc = "rdquo";	break;			case 8222: esc = "bdquo";	break;			case 8224: esc = "dagger";	break;			case 8225: esc = "Dagger";	break;			case 8240: esc = "permil";	break;			case 8249: esc = "lsaquo";	break;			case 8250: esc = "rsaquo";	break;			case 8364: esc = "euro";	break;			// the other HTMLsymbox.ent entities			case  402: esc = "fnof";	break;			case  977: esc = "thetasym";	break;			case  978: esc = "upsih";	break;			case  982: esc = "piv";		break;			case 8226: esc = "bull";	break;			case 8230: esc = "hellip";	break;			case 8242: esc = "prime";	break;			case 8243: esc = "Prime";	break;			case 8254: esc = "oline";	break;			case 8260: esc = "frasl";	break;			case 8472: esc = "weierp";	break;			case 8465: esc = "image";	break;			case 8476: esc = "real";	break;			case 8482: esc = "trade";	break;			case 8501: esc = "alefsym";	break;			case 8592: esc = "larr";	break;			case 8593: esc = "uarr";	break;			case 8594: esc = "rarr";	break;			case 8595: esc = "darr";	break;			case 8596: esc = "harr";	break;			case 8629: esc = "crarr";	break;			case 8656: esc = "lArr";	break;			case 8657: esc = "uArr";	break;			case 8658: esc = "rArr";	break;			case 8659: esc = "dArr";	break;			case 8660: esc = "hArr";	break;			case 8704: esc = "forall";	break;			case 8706: esc = "part";	break;			case 8707: esc = "exist";	break;			case 8709: esc = "empty";	break;			case 8711: esc = "nabla";	break;			case 8712: esc = "isin";	break;			case 8713: esc = "notin";	break;			case 8715: esc = "ni";		break;			case 8719: esc = "prod";	break;			case 8721: esc = "sum";		break;			case 8722: esc = "minus";	break;			case 8727: esc = "lowast";	break;			case 8730: esc = "radic";	break;			case 8733: esc = "prop";	break;			case 8734: esc = "infin";	break;			case 8736: esc = "ang";		break;			case 8743: esc = "and";		break;			case 8744: esc = "or";		break;			case 8745: esc = "cap";		break;			case 8746: esc = "cup";		break;			case 8747: esc = "int";		break;			case 8756: esc = "there4";	break;			case 8764: esc = "sim";		break;			case 8773: esc = "cong";	break;			case 8776: esc = "asymp";	break;			case 8800: esc = "ne";		break;			case 8801: esc = "equiv";	break;			case 8804: esc = "le";		break;			case 8805: esc = "ge";		break;			case 8834: esc = "sub";		break;			case 8835: esc = "sup";		break;			case 8836: esc = "nsub";	break;			case 8838: esc = "sube";	break;			case 8839: esc = "supe";	break;			case 8853: esc = "oplus";	break;			case 8855: esc = "otimes";	break;			case 8869: esc = "perp";	break;			case 8901: esc = "sdot";	break;			case 8968: esc = "lceil";	break;			case 8969: esc = "rceil";	break;			case 8970: esc = "lfloor";	break;			case 8971: esc = "rfloor";	break;			case 9001: esc = "lang";	break;			case 9002: esc = "rang";	break;			case 9674: esc = "loz";		break;			case 9824: esc = "spades";	break;			case 9827: esc = "clubs";	break;			case 9829: esc = "hearts";	break;			case 9830: esc = "diams";	break;		    }		}		// else escape with numeric char refs		if (esc == null) {		    stringBuf.setLength (0);		    stringBuf.append ("#x");		    stringBuf.append (Integer.toHexString (c).toUpperCase ());		    esc = stringBuf.toString ();		    // FIXME:  We don't write surrogate pairs correctly.		    // They should work as one ref per character, since		    // each pair is one character.  For reading back into		    // Unicode, it matters beginning in Unicode 3.1 ...		}		break;	    }	    if (i != first)		rawWrite (buf, off + first, i - first);	    first = i + 1;	    if (esc == eol)		newline ();	    else {		rawWrite ('&');		rawWrite (esc);		rawWrite (';');	    }	}	if (first < len)	    rawWrite (buf, off + first, len - first);    }    private void newline ()    throws SAXException, IOException    {	out.write (eol);	column = 0;    }    private void doIndent ()    throws SAXException, IOException    {	int	space = elementNestLevel * 2;	newline ();	column = space;	// track tabs only at line starts	while (space > 8) {	    out.write ("\t");	    space -= 8;	}	while (space > 0) {	    out.write ("  ");	    space -= 2;	}    }    private void rawWrite (char c)    throws IOException    {	out.write (c);	column++;    }    private void rawWrite (String s)    throws SAXException, IOException    {	if (prettyPrinting && "default".equals (space.peek ())) {	    char data [] = s.toCharArray ();	    rawWrite (data, 0, data.length);	} else {	    out.write (s);	    column += s.length ();	}    }    // NOTE:  if xhtml, the REC gives some rules about whitespace    // which we could follow ... notably, many places where conformant    // agents "must" consolidate/normalize whitespace.  Line ends can    // be removed there, etc.  This may not be the right place to do    // such mappings though.    // Line buffering may help clarify algorithms and improve results.    // It's likely xml:space needs more attention.    private void rawWrite (char buf [], int offset, int length)    throws SAXException, IOException    {	boolean		wrap;	if (prettyPrinting && space.empty ())	    fatal ("stack discipline", null);	wrap = prettyPrinting && "default".equals (space.peek ());	if (!wrap) {	    out.write (buf, offset, length);	    column += length;	    return;	}	// we're pretty printing and want to fill lines out only	// to the desired line length.	while (length > 0) {	    int		target = lineLength - column;	    boolean	wrote = false;	    // Do we even have a problem?	    if (target > length || noWrap) {		out.write (buf, offset, length);		column += length;		return;	    }	    // break the line at a space character, trying to fill	    // as much of the line as possible.	    char	c;	    for (int i = target - 1; i >= 0; i--) {		if ((c = buf [offset + i]) == ' ' || c == '\t') {		    i++;		    out.write (buf, offset, i);		    doIndent ();		    offset += i;		    length -= i;		    wrote = true;		    break;		}	    }	    if (wrote)		continue;	    	    // no space character permitting break before target	    // line length is filled.  So, take the next one.	    if (target < 0)		target = 0;	    for (int i = target; i < length; i++)		if ((c = buf [offset + i]) == ' ' || c == '\t') {		    i++;		    out.write (buf, offset, i);		    doIndent ();		    offset += i;		    length -= i;		    wrote = true;		    break;		}	    if (wrote)		continue;	    	    // no such luck.	    out.write (buf, offset, length);	    column += length;	    break;	}    }}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -