📄 xmlparser.java

📁 好东西啊！你看看就知道了
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    {	int	valueType = ATTRIBUTE_DEFAULT_SPECIFIED;	String	value = null;	int	flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK;	// Note: char refs not checked here, and input not normalized,	// since it's done correctly later when we actually expand any	// entity refs.  We ought to report char ref syntax errors now,	// but don't.  Cost: unused defaults mean unreported WF errs.		// LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace	// chars to spaces (doesn't matter when that's done if it doesn't	// interfere with char refs expanding to whitespace).	if (tryRead ('#')) {	    if (tryRead ("FIXED")) {		valueType = ATTRIBUTE_DEFAULT_FIXED;		requireWhitespace ();		value = readLiteral (flags);	    } else if (tryRead ("REQUIRED")) {		valueType = ATTRIBUTE_DEFAULT_REQUIRED;	    } else if (tryRead ("IMPLIED")) {		valueType = ATTRIBUTE_DEFAULT_IMPLIED;	    } else {		error ("illegal keyword for attribute default value");	    }	} else	    value = readLiteral (flags);	setAttribute (elementName, name, type, enum, value, valueType);    }    /**     * Parse a conditional section.     * <pre>     * [61] conditionalSect ::= includeSect || ignoreSect     * [62] includeSect ::= '&lt;![' S? 'INCLUDE' S? '['     *		extSubsetDecl ']]&gt;'     * [63] ignoreSect ::= '&lt;![' S? 'IGNORE' S? '['     *		ignoreSectContents* ']]&gt;'     * [64] ignoreSectContents ::= Ignore     *		('&lt;![' ignoreSectContents* ']]&gt;' Ignore )*     * [65] Ignore ::= Char* - (Char* ( '&lt;![' | ']]&gt;') Char* )     * </pre>     * <p> NOTE: the '&gt;![' has already been read.     */    private void parseConditionalSect ()    throws Exception    {	skipWhitespace ();	if (tryRead ("INCLUDE")) {	    skipWhitespace ();	    require ('[');	    skipWhitespace ();	    while (!tryRead ("]]>")) {		parseMarkupdecl ();		skipWhitespace ();	    }	} else if (tryRead ("IGNORE")) {	    skipWhitespace ();	    require ('[');	    int nesting = 1;	    char c;	    expandPE = false;	    for (int nest = 1; nest > 0;) {		c = readCh ();		switch (c) {		case '<':		    if (tryRead ("![")) {			nest++;		    }		case ']':		    if (tryRead ("]>")) {			nest--;		    }		}	    }	    expandPE = true;	} else {	    error ("conditional section must begin with INCLUDE or IGNORE");	}    }    /**     * Read and interpret a character reference.     * <pre>     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'     * </pre>     * <p>NOTE: the '&#' has already been read.     */    private void parseCharRef ()    throws SAXException, IOException    {	int value = 0;	char c;	if (tryRead ('x')) {loop1:	    while (true) {		c = readCh ();		switch (c) {		case '0':		case '1':		case '2':		case '3':		case '4':		case '5':		case '6':		case '7':		case '8':		case '9':		case 'a':		case 'A':		case 'b':		case 'B':		case 'c':		case 'C':		case 'd':		case 'D':		case 'e':		case 'E':		case 'f':		case 'F':		    value *= 16;		    value += Integer.parseInt (new Character (c).toString (),				    16);		    break;		case ';':		    break loop1;		default:		    error ("illegal character in character reference", c, null);		    break loop1;		}	    }	} else {loop2:	    while (true) {		c = readCh ();		switch (c) {		case '0':		case '1':		case '2':		case '3':		case '4':		case '5':		case '6':		case '7':		case '8':		case '9':		    value *= 10;		    value += Integer.parseInt (new Character (c).toString (),				    10);		    break;		case ';':		    break loop2;		default:		    error ("illegal character in character reference", c, null);		    break loop2;		}	    }	}	// check for character refs being legal XML	if ((value < 0x0020		&& ! (value == '\n' || value == '\t' || value == '\r'))		|| (value >= 0xD800 && value <= 0xDFFF)		|| value == 0xFFFE || value == 0xFFFF		|| value > 0x0010ffff)	    error ("illegal XML character reference U+"		    + Integer.toHexString (value));	// Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz	//  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:	if (value <= 0x0000ffff) {	    // no surrogates needed	    dataBufferAppend ((char) value);	} else if (value <= 0x0010ffff) {	    value -= 0x10000;	    // > 16 bits, surrogate needed	    dataBufferAppend ((char) (0xd800 | (value >> 10)));	    dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff)));	} else {	    // too big for surrogate	    error ("character reference " + value + " is too large for UTF-16",		   new Integer (value).toString (), null);	}    }    /**     * Parse and expand an entity reference.     * <pre>     * [68] EntityRef ::= '&' Name ';'     * </pre>     * <p>NOTE: the '&amp;' has already been read.     * @param externalAllowed External entities are allowed here.     */    private void parseEntityRef (boolean externalAllowed)    throws SAXException, IOException    {	String name;	name = readNmtoken (true);	require (';');	switch (getEntityType (name)) {	case ENTITY_UNDECLARED:	    error ("reference to undeclared entity", name, null);	    break;	case ENTITY_INTERNAL:	    pushString (name, getEntityValue (name));	    break;	case ENTITY_TEXT:	    if (externalAllowed) {		pushURL (name, getEntityPublicId (name),			 getEntitySystemId (name),			 null, null, null);	    } else {		error ("reference to external entity in attribute value.",			name, null);	    }	    break;	case ENTITY_NDATA:	    if (externalAllowed) {		error ("unparsed entity reference in content", name, null);	    } else {		error ("reference to external entity in attribute value.",			name, null);	    }	    break;	}    }    /**     * Parse and expand a parameter entity reference.     * <pre>     * [69] PEReference ::= '%' Name ';'     * </pre>     * <p>NOTE: the '%' has already been read.     */    private void parsePEReference ()    throws SAXException, IOException    {	String name;	name = "%" + readNmtoken (true);	require (';');	switch (getEntityType (name)) {	case ENTITY_UNDECLARED:	    // this is a validity problem, not a WFC violation ... but	    // we should disable handling of all subsequent declarations	    // unless this is a standalone document	    // warn ("reference to undeclared parameter entity", name, null);	    break;	case ENTITY_INTERNAL:	    if (inLiteral)		pushString (name, getEntityValue (name));	    else		pushString (name, " " + getEntityValue (name) + ' ');	    break;	case ENTITY_TEXT:	    if (!inLiteral)		pushString (null, " ");	    pushURL (name, getEntityPublicId (name),		     getEntitySystemId (name),		     null, null, null);	    if (!inLiteral)		pushString (null, " ");	    break;	}    }    /**     * Parse an entity declaration.     * <pre>     * [70] EntityDecl ::= GEDecl | PEDecl     * [71] GEDecl ::= '&lt;!ENTITY' S Name S EntityDef S? '&gt;'     * [72] PEDecl ::= '&lt;!ENTITY' S '%' S Name S PEDef S? '&gt;'     * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)     * [74] PEDef ::= EntityValue | ExternalID     * [75] ExternalID ::= 'SYSTEM' S SystemLiteral     *		   | 'PUBLIC' S PubidLiteral S SystemLiteral     * [76] NDataDecl ::= S 'NDATA' S Name     * </pre>     * <p>NOTE: the '&lt;!ENTITY' has already been read.     */    private void parseEntityDecl ()    throws Exception    {	char c;	boolean peFlag = false;	String name, value, notationName, ids[];	// Check for a parameter entity.	expandPE = false;	requireWhitespace ();	if (tryRead ('%')) {	    peFlag = true;	    requireWhitespace ();	}	expandPE = true;	// Read the entity name, and prepend	// '%' if necessary.	name = readNmtoken (true);	if (peFlag) {	    name = "%" + name;	}	// Read the entity value.	requireWhitespace ();	c = readCh ();	unread (c);	if (c == '"' || c == '\'') {	    // Internal entity ... replacement text has expanded refs	    // to characters and PEs, but not to general entities	    value = readLiteral (0);	    setInternalEntity (name, value);	} else {	    // Read the external IDs	    ids = readExternalIds (false);	    if (ids [1] == null) {		error ("system identifer missing", name, null);	    }	    // Check for NDATA declaration.	    boolean white = tryWhitespace ();	    if (!peFlag && tryRead ("NDATA")) {		if (!white)		    error ("whitespace required before NDATA");		requireWhitespace ();		notationName = readNmtoken (true);		setExternalDataEntity (name, ids [0], ids [1], notationName);	    } else {		setExternalTextEntity (name, ids [0], ids [1]);	    }	}	// Finish the declaration.	skipWhitespace ();	require ('>');    }    /**     * Parse a notation declaration.     * <pre>     * [82] NotationDecl ::= '&lt;!NOTATION' S Name S     *		(ExternalID | PublicID) S? '&gt;'     * [83] PublicID ::= 'PUBLIC' S PubidLiteral     * </pre>     * <P>NOTE: the '&lt;!NOTATION' has already been read.     */    private void parseNotationDecl ()    throws Exception    {	String nname, ids[];	requireWhitespace ();	nname = readNmtoken (true);	requireWhitespace ();	// Read the external identifiers.	ids = readExternalIds (true);	if (ids [0] == null && ids [1] == null) {	    error ("external identifer missing", nname, null);	}	// Register the notation.	setNotation (nname, ids [0], ids [1]);	skipWhitespace ();	require ('>');    }    /**     * Parse character data.     * <pre>     * [14] CharData ::= [^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)     * </pre>     */    private void parseCharData ()    throws Exception    {	char c;	// Start with a little cheat -- in most	// cases, the entire sequence of	// character data will already be in	// the readBuffer; if not, fall through to	// the normal approach.	if (USE_CHEATS) {	    int lineAugment = 0;	    int columnAugment = 0;loop:	    for (int i = readBufferPos; i < readBufferLength; i++) {		switch (c = readBuffer [i]) {		case '\n':		    lineAugment++;		    columnAugment = 0;		    break;		case '&':		case '<':		    int start = readBufferPos;		    columnAugment++;		    readBufferPos = i;		    if (lineAugment > 0) {			line += lineAugment;			column = columnAugment;		    } else {			column += columnAugment;		    }		    dataBufferAppend (readBuffer, start, i - start);		    return;		case ']':		    // XXX missing two end-of-buffer cases		    if ((i + 2) < readBufferLength) {			if (readBuffer [i + 1] == ']'				&& readBuffer [i + 2] == '>') {			    error ("character data may not contain ']]>'");			}		    }		    columnAugment++;		    break;		default:		    if (c < 0x0020 || c > 0xFFFD)			error ("illegal XML character U+"				+ Integer.toHexString (c));		    // FALLTHROUGH		case '\r':		case '\t':		    columnAugment++;		}	    }	}	// OK, the cheat didn't work; start over	// and do it by the book.	while (true) {	    c = readCh ();	    switch (c) {	    case '<':	    case '&':		unread (c);		return;	    // XXX "]]>" precluded ...	    default:		dataBufferAppend (c);		break;	    }	}    }    //////////////////////////////////////////////////////////////////////    // High-level reading and scanning methods.    //////////////////////////////////////////////////////////////////////    /**     * Require whitespace characters.     */    private void requireWhitespace ()    throws SAXException, IOException    {	char c = readCh ();	if (isWhitespace (c)) {	    skipWhitespace ();	} else {	    error ("whitespace required", c, null);	}    }    /**     * Parse whitespace characters, and leave them in the data buffer.     */    private void parseWhitespace ()    throws Exception    {	char c = readCh ();	while (isWhitespace (c)) {	    dataBufferAppend (c);	    c = readCh ();	}	unread (c);    }    /**     * Skip whitespace characters.     * <pre>     * [3] S ::= (#x20 | #x9 | #xd | #xa)+     * </pre>     */    private void skipWhitespace ()    throws SAXException, IOException    {	// Start with a little cheat.  Most of	// the time, the white space will fall	// within the current read buffer; if	// not, then fall through.	if (USE_CHEATS) {	    int lineAugment = 0;	    int columnAugment = 0;loop:	    for (int i = readBufferPos; i < readBufferLength; i++) {		switch (readBuffer [i]) {		case ' ':		case '\t':		case '\r':		    columnAugment++;		    break;		case '\n':		    lineAugment++;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -