xmlparser.java

来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 2,494 行 · 第 1/5 页

JAVA
2,494
字号
	String nname, ids[];	requireWhitespace ();	nname = readNmtoken (true);	requireWhitespace ();	// Read the external identifiers.	ids = readExternalIds (true, false);	// Register the notation.	setNotation (nname, ids);	skipWhitespace ();	require ('>');    }    /**     * Parse character data.     * <pre>     * [14] CharData ::= [^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)     * </pre>     */    private void parseCharData ()    throws Exception    {	char	c;	int	state = 0;	boolean pureWhite = false;	// assert (dataBufferPos == 0);	// are we expecting pure whitespace?  it might be dirty...	if (currentElementContent == CONTENT_ELEMENTS);	    pureWhite = true;	// always report right out of readBuffer	// to minimize (pointless) buffer copies	while (true) {	    int lineAugment = 0;	    int columnAugment = 0;	    int i;loop:	    for (i = readBufferPos; i < readBufferLength; i++) {		switch (c = readBuffer [i]) {		case '\n':		    lineAugment++;		    columnAugment = 0;		    // pureWhite unmodified		    break;		case '\r':	// should not happen!!		case '\t':		case ' ':		    // pureWhite unmodified		    columnAugment++;		    break;		case '&':		case '<':		    columnAugment++;		    // pureWhite unmodified		    // CLEAN end of text sequence		    state = 1;		    break loop;		case ']':		    // that's not a whitespace char, and		    // can not terminate pure whitespace either		    pureWhite = false;		    if ((i + 2) < readBufferLength) {			if (readBuffer [i + 1] == ']'				&& readBuffer [i + 2] == '>') {			    // ERROR end of text sequence			    state = 2;			    break loop;			}		    } else {			// FIXME missing two end-of-buffer cases		    }		    columnAugment++;		    break;		default:		    if (c < 0x0020 || c > 0xFFFD)			error ("illegal XML character U+"				+ Integer.toHexString (c));		    // that's not a whitespace char		    pureWhite = false;		    columnAugment++;		}	    }	    // report text thus far	    if (lineAugment > 0) {		line += lineAugment;		column = columnAugment;	    } else {		column += columnAugment;	    }	    // report characters/whitspace	    int		length = i - readBufferPos;	    if (length != 0) {		if (pureWhite)		    handler.ignorableWhitespace (readBuffer,		    		readBufferPos, length);		else		    handler.charData (readBuffer, readBufferPos, length);		readBufferPos = i;	    }	    	    if (state != 0)		break;	    // fill next buffer from this entity, or	    // pop stack and continue with previous entity	    unread (readCh ());	}	// finish, maybe with error	if (state != 1)	// finish, no error	    error ("character data may not contain ']]>'");    }    //////////////////////////////////////////////////////////////////////    // High-level reading and scanning methods.    //////////////////////////////////////////////////////////////////////    /**     * Require whitespace characters.     */    private void requireWhitespace ()    throws SAXException, IOException    {	char c = readCh ();	if (isWhitespace (c)) {	    skipWhitespace ();	} else {	    error ("whitespace required", c, null);	}    }    /**     * Skip whitespace characters.     * <pre>     * [3] S ::= (#x20 | #x9 | #xd | #xa)+     * </pre>     */    private void skipWhitespace ()    throws SAXException, IOException    {	// Start with a little cheat.  Most of	// the time, the white space will fall	// within the current read buffer; if	// not, then fall through.	if (USE_CHEATS) {	    int lineAugment = 0;	    int columnAugment = 0;loop:	    for (int i = readBufferPos; i < readBufferLength; i++) {		switch (readBuffer [i]) {		case ' ':		case '\t':		case '\r':		    columnAugment++;		    break;		case '\n':		    lineAugment++;		    columnAugment = 0;		    break;		case '%':		    if (expandPE)			break loop;		    // else fall through...		default:		    readBufferPos = i;		    if (lineAugment > 0) {			line += lineAugment;			column = columnAugment;		    } else {			column += columnAugment;		    }		    return;		}	    }	}	// OK, do it the slow way.	char c = readCh ();	while (isWhitespace (c)) {	    c = readCh ();	}	unread (c);    }    /**     * Read a name or (when parsing an enumeration) name token.     * <pre>     * [5] Name ::= (Letter | '_' | ':') (NameChar)*     * [7] Nmtoken ::= (NameChar)+     * </pre>     */    private String readNmtoken (boolean isName)    throws SAXException, IOException    {	char c;	if (USE_CHEATS) {loop:	    for (int i = readBufferPos; i < readBufferLength; i++) {		c = readBuffer [i];		switch (c) {		  case '%':		    if (expandPE)			break loop;		    // else fall through...		    // What may legitimately come AFTER a name/nmtoken?		  case '<': case '>': case '&':		  case ',': case '|': case '*': case '+': case '?':		  case ')':		  case '=':		  case '\'': case '"':		  case '[':		  case ' ': case '\t': case '\r': case '\n':		  case ';':		  case '/':		    int start = readBufferPos;		    if (i == start)			error ("name expected", readBuffer [i], null);		    readBufferPos = i;		    return intern (readBuffer, start, i - start);		  default:// FIXME ... per IBM's OASIS test submission, these://   ?		U+06dd // REJECT//   BaseChar	U+0132 U+0133 U+013F U+0140 U+0149 U+017F U+01C4 U+01CC//		U+01F1 U+01F3 U+0E46 U+1011 U+1104 U+1108 U+110A U+110D//		U+113B U+113F U+1141 U+114D U+114F U+1151 U+1156 U+1162//		U+1164 U+1166 U+116B U+116F U+1174 U+119F U+11AC U+11B6//		U+11B9 U+11BB U+11C3 U+11F1 U+212F U+0587//   Combining	U+309B		    // punt on exact tests from Appendix A; approximate		    // them using the Unicode ID start/part rules		    if (i == readBufferPos && isName) {			if (!Character.isUnicodeIdentifierStart (c)				&& c != ':' && c != '_')			    error ("Not a name start character, U+"				  + Integer.toHexString (c));		    } else if (!Character.isUnicodeIdentifierPart (c)			    && c != '-' && c != ':' && c != '_' && c != '.'			    && !isExtender (c))			error ("Not a name character, U+"				+ Integer.toHexString (c));		}	    }	}	nameBufferPos = 0;	// Read the first character.loop:	while (true) {	    c = readCh ();	    switch (c) {	    case '%':	    case '<': case '>': case '&':	    case ',': case '|': case '*': case '+': case '?':	    case ')':	    case '=':	    case '\'': case '"':	    case '[':	    case ' ': case '\t': case '\n': case '\r':	    case ';':	    case '/':		unread (c);		if (nameBufferPos == 0) {		    error ("name expected");		}		// punt on exact tests from Appendix A, but approximate them		if (isName			&& !Character.isUnicodeIdentifierStart (				nameBuffer [0])			&& ":_".indexOf (nameBuffer [0]) == -1)		    error ("Not a name start character, U+"			      + Integer.toHexString (nameBuffer [0]));		String s = intern (nameBuffer, 0, nameBufferPos);		nameBufferPos = 0;		return s;	    default:		// punt on exact tests from Appendix A, but approximate them		if ((nameBufferPos != 0 || !isName)			&& !Character.isUnicodeIdentifierPart (c)			&& ":-_.".indexOf (c) == -1			&& !isExtender (c))		    error ("Not a name character, U+"			    + Integer.toHexString (c));		if (nameBufferPos >= nameBuffer.length)		    nameBuffer =			(char[]) extendArray (nameBuffer,				    nameBuffer.length, nameBufferPos);		nameBuffer [nameBufferPos++] = c;	    }	}    }    private static boolean isExtender (char c)    {	// [88] Extender ::= ...	return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387	       || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005	       || (c >= 0x3031 && c <= 0x3035)	       || (c >= 0x309d && c <= 0x309e)	       || (c >= 0x30fc && c <= 0x30fe);    }    /**     * Read a literal.  With matching single or double quotes as     * delimiters (and not embedded!) this is used to parse:     * <pre>     *	[9] EntityValue ::= ... ([^%&amp;] | PEReference | Reference)* ...     *	[10] AttValue ::= ... ([^<&] | Reference)* ...     *	[11] SystemLiteral ::= ... (URLchar - "'")* ...     *	[12] PubidLiteral ::= ... (PubidChar - "'")* ...     * </pre>     * as well as the quoted strings in XML and text declarations     * (for version, encoding, and standalone) which have their     * own constraints.     */    private String readLiteral (int flags)    throws SAXException, IOException    {	char	delim, c;	int	startLine = line;	boolean	saved = expandPE;	boolean	savedReport = doReport;	// Find the first delimiter.	delim = readCh ();	if (delim != '"' && delim != '\'') {	    error ("expected '\"' or \"'\"", delim, null);	    return null;	}	inLiteral = true;	if ((flags & LIT_DISABLE_PE) != 0)	    expandPE = false;	doReport = false;	// Each level of input source has its own buffer; remember	// ours, so we won't read the ending delimiter from any	// other input source, regardless of entity processing.	char ourBuf [] = readBuffer;	// Read the literal.	try {	    c = readCh ();loop:	    while (! (c == delim && readBuffer == ourBuf)) {		switch (c) {		    // attributes and public ids are normalized		    // in almost the same ways		case '\n':		case '\r':		    if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)			c = ' ';		    break;		case '\t':		    if ((flags & LIT_ATTRIBUTE) != 0)			c = ' ';		    break;		case '&':		    c = readCh ();		    // Char refs are expanded immediately, except for		    // all the cases where it's deferred.		    if (c == '#') {			if ((flags & LIT_DISABLE_CREF) != 0) {			    dataBufferAppend ('&');			    break;			}      parseCharRef (false /* Do not do flushDataBuffer */);			// exotic WFness risk: this is an entity literal,			// dataBuffer [dataBufferPos - 1] == '&', and			// following chars are a _partial_ entity/char ref		    // It looks like an entity ref ...		    } else {			unread (c);			// Expand it?			if ((flags & LIT_ENTITY_REF) > 0) {			    parseEntityRef (false);			// Is it just data?			} else if ((flags & LIT_DISABLE_EREF) != 0) {			    dataBufferAppend ('&');			// OK, it will be an entity ref -- expanded later.			} else {			    String name = readNmtoken (true);			    require (';');			    dataBufferAppend ('&');			    dataBufferAppend (name);			    dataBufferAppend (';');			}		    }		    c = readCh ();		    continue loop;		case '<':		    // and why?  Perhaps so "&foo;" expands the same		    // inside and outside an attribute?		    if ((flags & LIT_ATTRIBUTE) != 0)			error ("attribute values may not contain '<'");		    break;		// We don't worry about case '%' and PE refs, readCh does.		default:		    break;		}		dataBufferAppend (c);		c = readCh ();	    }	} catch (EOFException e) {	    error ("end of input while looking for delimiter (started on line "		   + startLine + ')', null, new Character (delim).toString ());	}	inLiteral = false;	expandPE = saved;	doReport = savedReport;	// Normalise whitespace if necessary.	if ((flags & LIT_NORMALIZE) > 0) {	    dataBufferNormalize ();	}	// Return the value.	return dataBufferToString ();    }    /**     * Try reading external identifiers.     * A system identifier is not required for notations.     * @param inNotation Are we parsing a notation decl?     * @param isSubset Parsing external subset decl (may be omitted)?     * @return A three-member String array containing the identifiers,     *	or nulls. Order: public, system, baseURI.     */    private String[] readExternalIds (boolean inNotation, boolean isSubset)    throws Exception    {	char	c;	String	ids[] = new String [3];	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;	if (tryRead ("PUBLIC")) {	    requireWhitespace ();	    ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags);	    if (inNotation) {		skipWhitespace ();		c = readCh ();		unread (c);		if (c == '"' || c == '\'') {		    ids [1] = readLiteral (flags);		}	    } else {		requireWhitespace ();		ids [1] = readLiteral (flags);	    }	    for (int i = 0; i < ids [0].length (); i++) {		c = ids [0].charAt (i);		if (c >= 'a' && c <= 'z')		    continue;		if (c >= 'A' && c <= 'Z')		    continue;		if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1)		    continue;		error ("illegal PUBLIC id character U+"			+ Integer.toHexString (c));	    }	} else if (tryRead ("SYSTEM")) {	    requireWhitespace ();	    ids [1] = readLiteral (flags);	} else if (!isSubset) 		error ("missing SYSTEM or PUBLIC keyword");	if (i

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?