📄 xmlparser.java

📁 好东西啊！你看看就知道了
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	expandPE = false;	parseUntil ("--");	require ('>');	expandPE = saved;	handler.comment (dataBuffer, 0, dataBufferPos);	dataBufferPos = 0;    }    /**     * Parse a processing instruction and do a call-back.     * <pre>     * [16] PI ::= '&lt;?' PITarget     *		(S (Char* - (Char* '?&gt;' Char*)))?     *		'?&gt;'     * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )     * </pre>     * <p> (The <code>&lt;?</code> has already been read.)     */    private void parsePI ()    throws SAXException, IOException    {	String name;	boolean saved = expandPE;	expandPE = false;	name = readNmtoken (true);	if ("xml".equalsIgnoreCase (name))	    error ("Illegal processing instruction target", name, null);	if (!tryRead ("?>")) {	    requireWhitespace ();	    parseUntil ("?>");	}	expandPE = saved;	handler.processingInstruction (name, dataBufferToString ());    }    /**     * Parse a CDATA section.     * <pre>     * [18] CDSect ::= CDStart CData CDEnd     * [19] CDStart ::= '&lt;![CDATA['     * [20] CData ::= (Char* - (Char* ']]&gt;' Char*))     * [21] CDEnd ::= ']]&gt;'     * </pre>     * <p> (The '&lt;![CDATA[' has already been read.)     */    private void parseCDSect ()    throws Exception    {	parseUntil ("]]>");	dataBufferFlush ();    }    /**     * Parse the prolog of an XML document.     * <pre>     * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?     * </pre>     * <p>There are a couple of tricks here.  First, it is necessary to     * declare the XML default attributes after the DTD (if present)     * has been read. [??]  Second, it is not possible to expand general     * references in attribute value literals until after the entire     * DTD (if present) has been parsed.     * <p>We do not look for the XML declaration here, because it was     * handled by pushURL ().     * @see pushURL     */    private void parseProlog ()    throws Exception    {	parseMisc ();	if (tryRead ("<!DOCTYPE")) {	    parseDoctypedecl ();	    parseMisc ();	}    }    /**     * Parse the XML declaration.     * <pre>     * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'     * [24] VersionInfo ::= S 'version' Eq     *		("'" VersionNum "'" | '"' VersionNum '"' )     * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*     * [32] SDDecl ::= S 'standalone' Eq     *		( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )     * [80] EncodingDecl ::= S 'encoding' Eq     *		( "'" EncName "'" | "'" EncName "'" )     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*     * </pre>     * <p> (The <code>&lt;?xml</code> and whitespace have already been read.)     * @return the encoding in the declaration, uppercased; or null     * @see #parseTextDecl     * @see #setupDecoding     */    private String parseXMLDecl (boolean ignoreEncoding)    throws SAXException, IOException    {	String	version;	String	encodingName = null;	String	standalone = null;	boolean	white;	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;	// Read the version.	require ("version");	parseEq ();	version = readLiteral (flags);	if (!version.equals ("1.0")) {	    error ("unsupported XML version", version, "1.0");	}	// Try reading an encoding declaration.	white = tryWhitespace ();	if (tryRead ("encoding")) {	    if (!white)		error ("whitespace required before 'encoding='");	    parseEq ();	    encodingName = readLiteral (flags);	    if (!ignoreEncoding)		setupDecoding (encodingName);	}	// Try reading a standalone declaration	if (encodingName != null)	    white = tryWhitespace ();	if (tryRead ("standalone")) {	    if (!white)		error ("whitespace required before 'standalone='");	    parseEq ();	    standalone = readLiteral (flags);	    if (! ("yes".equals (standalone) || "no".equals (standalone)))		error ("standalone flag must be 'yes' or 'no'");	}	skipWhitespace ();	require ("?>");	return encodingName;    }    /**     * Parse a text declaration.     * <pre>     * [79] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?&gt;'     * [80] EncodingDecl ::= S 'encoding' Eq     *		( '"' EncName '"' | "'" EncName "'" )     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*     * </pre>     * <p> (The <code>&lt;?xml</code>' and whitespace have already been read.)     * @return the encoding in the declaration, uppercased; or null     * @see #parseXMLDecl     * @see #setupDecoding     */    private String parseTextDecl (boolean ignoreEncoding)    throws SAXException, IOException    {	String	encodingName = null;	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;	// Read an optional version.	if (tryRead ("version")) {	    String version;	    parseEq ();	    version = readLiteral (flags);	    if (!version.equals ("1.0")) {		error ("unsupported XML version", version, "1.0");	    }	    requireWhitespace ();	}	// Read the encoding.	require ("encoding");	parseEq ();	encodingName = readLiteral (flags);	if (!ignoreEncoding)	    setupDecoding (encodingName);	skipWhitespace ();	require ("?>");	return encodingName;    }    /**     * Sets up internal state so that we can decode an entity using the     * specified encoding.  This is used when we start to read an entity     * and we have been given knowledge of its encoding before we start to     * read any data (e.g. from a SAX input source or from a MIME type).     *     * <p> It is also used after autodetection, at which point only very     * limited adjustments to the encoding may be used (switching between     * related builtin decoders).     *     * @param encodingName The name of the encoding specified by the user.     * @exception IOException if the encoding isn't supported either     *	internally to this parser, or by the hosting JVM.     * @see #parseXMLDecl     * @see #parseTextDecl     */    private void setupDecoding (String encodingName)    throws SAXException, IOException    {	encodingName = encodingName.toUpperCase ();	// ENCODING_EXTERNAL indicates an encoding that wasn't	// autodetected ... we can use builtin decoders, or	// ones from the JVM (InputStreamReader).	// Otherwise we can only tweak what was autodetected, and	// only for single byte (ASCII derived) builtin encodings.	// ASCII-derived encodings	if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) {	    if (encodingName.equals ("ISO-8859-1")		    || encodingName.equals ("8859_1")		    || encodingName.equals ("ISO8859_1")	      ) {		encoding = ENCODING_ISO_8859_1;		return;	    } else if (encodingName.equals ("US-ASCII")			|| encodingName.equals ("ASCII")) {		encoding = ENCODING_ASCII;		return;	    } else if (encodingName.equals ("UTF-8")			|| encodingName.equals ("UTF8")) {		encoding = ENCODING_UTF_8;		return;	    } else if (encoding != ENCODING_EXTERNAL) {		// fatal error		error ("unsupported ASCII-derived encoding",		       encodingName,		       "UTF-8, US-ASCII, or ISO-8859-1");	    }	    // else fallthrough ...	    // it's ASCII-ish and something other than a builtin	}	// Unicode and such	if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) {	    if (!(encodingName.equals ("ISO-10646-UCS-2")		    || encodingName.equals ("UTF-16")		    || encodingName.equals ("UTF-16BE")		    || encodingName.equals ("UTF-16LE")))		error ("unsupported Unicode encoding",		       encodingName,		       "UTF-16");	    return;	}	// four byte encodings	if (encoding == ENCODING_UCS_4_1234		|| encoding == ENCODING_UCS_4_4321		|| encoding == ENCODING_UCS_4_2143		|| encoding == ENCODING_UCS_4_3412) {	    if (!encodingName.equals ("ISO-10646-UCS-4"))		error ("unsupported 32-bit encoding",		       encodingName,		       "ISO-10646-UCS-4");	    return;	}	// assert encoding == ENCODING_EXTERNAL	// if (encoding != ENCODING_EXTERNAL)	//     throw new RuntimeException ("encoding = " + encoding);	if (encodingName.equals ("UTF-16BE")) {	    encoding = ENCODING_UCS_2_12;	    return;	}	if (encodingName.equals ("UTF-16LE")) {	    encoding = ENCODING_UCS_2_21;	    return;	}	// We couldn't use the builtin decoders at all.  But we can try to	// create a reader, since we haven't messed up buffering.  Tweak	// the encoding name if necessary.	if (encodingName.equals ("UTF-16")		|| encodingName.equals ("ISO-10646-UCS-2"))	    encodingName = "Unicode";	// Ignoring all the EBCDIC aliases here	reader = new InputStreamReader (is, encodingName);	sourceType = INPUT_READER;	is = null;    }    /**     * Parse miscellaneous markup outside the document element and DOCTYPE     * declaration.     * <pre>     * [27] Misc ::= Comment | PI | S     * </pre>     */    private void parseMisc ()    throws Exception    {	while (true) {	    skipWhitespace ();	    if (tryRead ("<?")) {		parsePI ();	    } else if (tryRead ("<!--")) {		parseComment ();	    } else {		return;	    }	}    }    /**     * Parse a document type declaration.     * <pre>     * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?     *		('[' (markupdecl | PEReference | S)* ']' S?)? '&gt;'     * </pre>     * <p> (The <code>&lt;!DOCTYPE</code> has already been read.)     */    private void parseDoctypedecl ()    throws Exception    {	char c;	String doctypeName, ids[];	// Read the document type name.	requireWhitespace ();	doctypeName = readNmtoken (true);	// Read the External subset's IDs	skipWhitespace ();	ids = readExternalIds (false);	// report (a) declaration of name, (b) lexical info (ids)	handler.doctypeDecl (doctypeName, ids [0], ids [1]);	// Internal subset is parsed first, if present	skipWhitespace ();	if (tryRead ('[')) {	    // loop until the subset ends	    while (true) {		expandPE = true;		skipWhitespace ();		expandPE = false;		if (tryRead (']')) {		    break; 		// end of subset		} else {		    // WFC, PEs in internal subset (only between decls)		    peIsError = expandPE = true;		    parseMarkupdecl ();		    peIsError = expandPE = false;		}	    }	}	// Read the external subset, if any	if (ids [1] != null) {	    pushURL ("[external subset]", ids [0], ids [1], null, null, null);	    // Loop until we end up back at '>'	    while (true) {		expandPE = true;		skipWhitespace ();		expandPE = false;		if (tryRead ('>')) {		    break;		} else {		    expandPE = true;		    parseMarkupdecl ();		    expandPE = false;		}	    }	} else {	    // No external subset.	    skipWhitespace ();	    require ('>');	}	// done dtd	handler.endDoctype ();	expandPE = false;    }    /**     * Parse a markup declaration in the internal or external DTD subset.     * <pre>     * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl     *		| NotationDecl | PI | Comment     * [30] extSubsetDecl ::= (markupdecl | conditionalSect     *		| PEReference | S) *     * </pre>     * <p> Reading toplevel PE references is handled as a lexical issue     * by the caller, as is whitespace.     */    private void parseMarkupdecl ()    throws Exception    {	if (tryRead ("<!ELEMENT")) {	    parseElementdecl ();	} else if (tryRead ("<!ATTLIST")) {	    parseAttlistDecl ();	} else if (tryRead ("<!ENTITY")) {	    parseEntityDecl ();	} else if (tryRead ("<!NOTATION")) {	    parseNotationDecl ();	} else if (tryRead ("<?")) {	    parsePI ();	} else if (tryRead ("<!--")) {	    parseComment ();	} else if (tryRead ("<![")) {	    if (inputStack.size () > 0)		parseConditionalSect ();	    else		error ("conditional sections illegal in internal subset");	} else {	    error ("expected markup declaration");	}    }    /**     * Parse an element, with its tags.     * <pre>     * [39] element ::= EmptyElementTag | STag content ETag     * [40] STag ::= '&lt;' Name (S Attribute)* S? '&gt;'     * [44] EmptyElementTag ::= '&lt;' Name (S Attribute)* S? '/&gt;'     * </pre>     * <p> (The '&lt;' has already been read.)     * <p>NOTE: this method actually chains onto parseContent (), if necessary,     * and parseContent () will take care of calling parseETag ().     */    private void parseElement ()    throws Exception    {	String	gi;	char	c;	int	oldElementContent = currentElementContent;	String	oldElement = currentElement;	Object	element [];	// This is the (global) counter for the	// array of specified attributes.	tagAttributePos = 0;	// Read the element type name.	gi = readNmtoken (true);	// Determine the current content type.	currentElement = gi;	element = (Object []) elementInfo.get (gi);	currentElementContent = getContentType (element, CONTENT_ANY);	// Read the attributes, if any.	// After this loop, "c" is the closing delimiter.	boolean white = tryWhitespace ();	c = readCh ();	while (c != '/' && c != '>') {	    unread (c);	    if (!white)		error ("need whitespace between attributes");	    parseAttribute (gi);	    white = tryWhitespace ();	    c = readCh ();	}	// Supply any defaulted attributes.	Enumeration atts = declaredAttributes (element);	if (atts != null) {	    String aname;loop:	    while (atts.hasMoreElements ()) {		aname = (String) atts.nextElement ();		// See if it was specified.		for (int i = 0; i < tagAttributePos; i++) {		    if (tagAttributes [i] == aname) {			continue loop;		    }		}		// I guess not...		handler.attribute (aname,				   getAttributeExpandedValue (gi, aname),				   false);	    }	}	// Figure out if this is a start tag	// or an empty element, and dispatch an	// event accordingly.	switch (c) {	case '>':	    handler.startElement (gi);	    parseContent ();	    break;	case '/':	    require ('>');	    handler.startElement (gi);	    handler.endElement (gi);	    break;	}	// Restore the previous state.	currentElement = oldElement;	currentElementContent = oldElementContent;    }    /**     * Parse an attribute assignment.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -