📄 xmlparser.java

📁 好东西啊！你看看就知道了
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		    columnAugment = 0;		    break;		case '%':		    if (expandPE)			break loop;		    // else fall through...		default:		    readBufferPos = i;		    if (lineAugment > 0) {			line += lineAugment;			column = columnAugment;		    } else {			column += columnAugment;		    }		    return;		}	    }	}	// OK, do it by the book.	char c = readCh ();	while (isWhitespace (c)) {	    c = readCh ();	}	unread (c);    }    /**     * Read a name or (when parsing an enumeration) name token.     * <pre>     * [5] Name ::= (Letter | '_' | ':') (NameChar)*     * [7] Nmtoken ::= (NameChar)+     * </pre>     */    private String readNmtoken (boolean isName)    throws SAXException, IOException    {	char c;	if (USE_CHEATS) {loop:	    for (int i = readBufferPos; i < readBufferLength; i++) {		c = readBuffer [i];		switch (c) {		  case '%':		    if (expandPE)			break loop;		    // else fall through...		    // What may legitimately come AFTER a name/nmtoken?		  case '<': case '>': case '&':		  case ',': case '|': case '*': case '+': case '?':		  case ')':		  case '=':		  case '\'': case '"':		  case '[':		  case ' ': case '\t': case '\r': case '\n':		  case ';':		  case '/':		    int start = readBufferPos;		    if (i == start)			error ("name expected", readBuffer [i], null);		    readBufferPos = i;		    return intern (readBuffer, start, i - start);		  default:		    // punt on exact tests from Appendix A; approximate		    // them using the Unicode ID start/part rules		    if (i == readBufferPos && isName) {			if (!Character.isUnicodeIdentifierStart (c)				&& c != ':' && c != '_')			    error ("Not a name start character, U+"				  + Integer.toHexString (c));		    } else if (!Character.isUnicodeIdentifierPart (c)			    && c != '-' && c != ':' && c != '_' && c != '.'			    && !isExtender (c))			error ("Not a name character, U+"				+ Integer.toHexString (c));		}	    }	}	nameBufferPos = 0;	// Read the first character.loop:	while (true) {	    c = readCh ();	    switch (c) {	    case '%':	    case '<': case '>': case '&':	    case ',': case '|': case '*': case '+': case '?':	    case ')':	    case '=':	    case '\'': case '"':	    case '[':	    case ' ': case '\t': case '\n': case '\r':	    case ';':	    case '/':		unread (c);		if (nameBufferPos == 0) {		    error ("name expected");		}		// punt on exact tests from Appendix A, but approximate them		if (isName			&& !Character.isUnicodeIdentifierStart (				nameBuffer [0])			&& ":_".indexOf (nameBuffer [0]) == -1)		    error ("Not a name start character, U+"			      + Integer.toHexString (nameBuffer [0]));		String s = intern (nameBuffer, 0, nameBufferPos);		nameBufferPos = 0;		return s;	    default:		// punt on exact tests from Appendix A, but approximate them		if ((nameBufferPos != 0 || !isName)			&& !Character.isUnicodeIdentifierPart (c)			&& ":-_.".indexOf (c) == -1			&& !isExtender (c))		    error ("Not a name character, U+"			    + Integer.toHexString (c));		if (nameBufferPos >= nameBuffer.length)		    nameBuffer =			(char[]) extendArray (nameBuffer,				    nameBuffer.length, nameBufferPos);		nameBuffer [nameBufferPos++] = c;	    }	}    }    private static boolean isExtender (char c)    {	// [88] Extender ::= ...	return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387	       || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005	       || (c >= 0x3031 && c <= 0x3035)	       || (c >= 0x309d && c <= 0x309e)	       || (c >= 0x30fc && c <= 0x30fe);    }    /**     * Read a literal.  With matching single or double quotes as     * delimiters (and not embedded!) this is used to parse:     * <pre>     *	[9] EntityValue ::= ... ([^%&amp;] | PEReference | Reference)* ...     *	[10] AttValue ::= ... ([^<&] | Reference)* ...     *	[11] SystemLiteral ::= ... (URLchar - "'")* ...     *	[12] PubidLiteral ::= ... (PubidChar - "'")* ...     * </pre>     * as well as the quoted strings in XML and text declarations     * (for version, encoding, and standalone) which have their     * own constraints.     */    private String readLiteral (int flags)    throws SAXException, IOException    {	char	delim, c;	int	startLine = line;	boolean	saved = expandPE;	// Find the first delimiter.	delim = readCh ();	if (delim != '"' && delim != '\'' && delim != (char) 0) {	    error ("expected '\"' or \"'\"", delim, null);	    return null;	}	inLiteral = true;	if ((flags & LIT_DISABLE_PE) != 0)	    expandPE = false;	// Each level of input source has its own buffer; remember	// ours, so we won't read the ending delimiter from any	// other input source, regardless of entity processing.	char ourBuf [] = readBuffer;	// Read the literal.	try {	    c = readCh ();loop:	    while (! (c == delim && readBuffer == ourBuf)) {		switch (c) {		    // Can't escape this normalization for attributes		case '\n':		case '\r':		case '\t':		    if ((flags & LIT_ATTRIBUTE) != 0)			c = ' ';		    break;		case '&':		    c = readCh ();		    // Char refs are expanded immediately, except for		    // all the cases where it's deferred.		    if (c == '#') {			if ((flags & LIT_DISABLE_CREF) != 0) {			    dataBufferAppend ('&');			    dataBufferAppend ('#');			    continue;			}			parseCharRef ();		    // It looks like an entity ref ...		    } else {			unread (c);			// Expand it?			if ((flags & LIT_ENTITY_REF) > 0) {			    parseEntityRef (false);			// Is it just data?			} else if ((flags & LIT_DISABLE_EREF) != 0) {			    dataBufferAppend ('&');			// OK, it will be an entity ref -- expanded later.			} else {			    String name = readNmtoken (true);			    require (';');			    if ((flags & LIT_ENTITY_CHECK) != 0				    && getEntityType (name) ==					    ENTITY_UNDECLARED) {				error ("General entity '" + name				    + "' must be declared before use");			    }			    dataBufferAppend ('&');			    dataBufferAppend (name);			    dataBufferAppend (';');			}		    }		    c = readCh ();		    continue loop;		case '<':		    // and why?  Perhaps so "&foo;" expands the same		    // inside and outside an attribute?		    if ((flags & LIT_ATTRIBUTE) != 0)			error ("attribute values may not contain '<'");		    break;		// We don't worry about case '%' and PE refs, readCh does.		default:		    break;		}		dataBufferAppend (c);		c = readCh ();	    }	} catch (EOFException e) {	    error ("end of input while looking for delimiter (started on line "		   + startLine + ')', null, new Character (delim).toString ());	}	inLiteral = false;	expandPE = saved;	// Normalise whitespace if necessary.	if ((flags & LIT_NORMALIZE) > 0) {	    dataBufferNormalize ();	}	// Return the value.	return dataBufferToString ();    }    /**     * Try reading external identifiers.     * A system identifier is not required for notations.     * @param inNotation Are we in a notation?     * @return A two-member String array containing the identifiers.     */    private String[] readExternalIds (boolean inNotation)    throws Exception    {	char	c;	String	ids[] = new String [2];	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;	if (tryRead ("PUBLIC")) {	    requireWhitespace ();	    ids [0] = readLiteral (LIT_NORMALIZE | flags);	    if (inNotation) {		skipWhitespace ();		c = readCh ();		unread (c);		if (c == '"' || c == '\'') {		    ids [1] = readLiteral (flags);		}	    } else {		requireWhitespace ();		ids [1] = readLiteral (flags);	    }	    for (int i = 0; i < ids [0].length (); i++) {		c = ids [0].charAt (i);		if (c >= 'a' && c <= 'z')		    continue;		if (c >= 'A' && c <= 'Z')		    continue;		if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1)		    continue;		error ("illegal PUBLIC id character U+"			+ Integer.toHexString (c));	    }	} else if (tryRead ("SYSTEM")) {	    requireWhitespace ();	    ids [1] = readLiteral (flags);	}	// XXX should normalize system IDs as follows:	// - Convert to UTF-8	// - Map reserved and non-ASCII characters to %HH	return ids;    }    /**     * Test if a character is whitespace.     * <pre>     * [3] S ::= (#x20 | #x9 | #xd | #xa)+     * </pre>     * @param c The character to test.     * @return true if the character is whitespace.     */    private final boolean isWhitespace (char c)    {	if (c > 0x20)	    return false;	if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d)	    return true;	return false;	// illegal ...    }    //////////////////////////////////////////////////////////////////////    // Utility routines.    //////////////////////////////////////////////////////////////////////    /**     * Add a character to the data buffer.     */    private void dataBufferAppend (char c)    {	// Expand buffer if necessary.	if (dataBufferPos >= dataBuffer.length)	    dataBuffer =		(char[]) extendArray (dataBuffer,			dataBuffer.length, dataBufferPos);	dataBuffer [dataBufferPos++] = c;    }    /**     * Add a string to the data buffer.     */    private void dataBufferAppend (String s)    {	dataBufferAppend (s.toCharArray (), 0, s.length ());    }    /**     * Append (part of) a character array to the data buffer.     */    private void dataBufferAppend (char ch[], int start, int length)    {	dataBuffer = (char[])		extendArray (dataBuffer, dataBuffer.length,				    dataBufferPos + length);	System.arraycopy (ch, start, dataBuffer, dataBufferPos, length);	dataBufferPos += length;    }    /**     * Normalise whitespace in the data buffer.     */    private void dataBufferNormalize ()    {	int i = 0;	int j = 0;	int end = dataBufferPos;	// Skip whitespace at the start.	while (j < end && isWhitespace (dataBuffer [j])) {	    j++;	}	// Skip whitespace at the end.	while (end > j && isWhitespace (dataBuffer [end - 1])) {	    end --;	}	// Start copying to the left.	while (j < end) {	    char c = dataBuffer [j++];	    // Normalise all other whitespace to	    // a single space.	    if (isWhitespace (c)) {		while (j < end && isWhitespace (dataBuffer [j++])) {}		dataBuffer [i++] = ' ';		dataBuffer [i++] = dataBuffer [j - 1];	    } else {		dataBuffer [i++] = c;	    }	}	// The new length is <= the old one.	dataBufferPos = i;    }    /**     * Convert the data buffer to a string.     */    private String dataBufferToString ()    {	String s = new String (dataBuffer, 0, dataBufferPos);	dataBufferPos = 0;	return s;    }    /**     * Flush the contents of the data buffer to the handler, as     * appropriate, and reset the buffer for new input.     */    private void dataBufferFlush ()    throws SAXException    {	if (currentElementContent == CONTENT_ELEMENTS		&& dataBufferPos > 0		&& !inCDATA		) {	    // We can't just trust the buffer to be whitespace, there	    // are cases when it isn't	    for (int i = 0; i < dataBufferPos; i++) {		if (!isWhitespace (dataBuffer [i])) {		    handler.charData (dataBuffer, 0, dataBufferPos);		    dataBufferPos = 0;		}	    }	    if (dataBufferPos > 0) {		handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos);		dataBufferPos = 0;	    }	} else if (dataBufferPos > 0) {	    handler.charData (dataBuffer, 0, dataBufferPos);	    dataBufferPos = 0;	}    }    /**     * Require a string to appear, or throw an exception.     * <p><em>Precondition:</em> Entity expansion is not required.     * <p><em>Precondition:</em> data buffer has no characters that     * will get sent to the application.     */    private void require (String delim)    throws SAXException, IOException    {	int	length = delim.length ();	char	ch [];			if (length < dataBuffer.length) {	    ch = dataBuffer;	    delim.getChars (0, length, ch, 0);	} else	    ch = delim.toCharArray ();	if (USE_CHEATS		&& length <= (readBufferLength - readBufferPos)) {	    int offset = readBufferPos;	    for (int i = 0; i < length; i++, offset++)		if (ch [i] != readBuffer [offset])		    error ("required string", null, delim);	    readBufferPos = offset;	    	} else {	    for (int i = 0; i < length; i++)		require (ch [i]);	}    }    /**     * Require a character to appear, or throw an exception.     */    private void require (char delim)    throws SAXException, IOException    {	char c = readCh ();	if (c != delim) {	    error ("required character", c, new Character (delim).toString ());	}    }    /**     * Create an interned string from a character array.     * &AElig;lfred uses this method to create an interned version     * of all names and name tokens, so that it can test equality     * with <code>==</code> instead of <code>String.equals ()</code>.     *     * <p>This is much more efficient than constructing a non-interned     * string first, and then interning it.     *     * @param ch an array of characters for building the string.     * @param start the starting position in the array.     * @param length the number of characters to place in the string.     * @return an interned string.     * @see #intern (String)     * @see java.lang.String#i
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -