📄 xmlparser.java

📁 开源的java 编辑器源代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    name = readNmtoken(true);    if (!tryRead("?>")) {      requireWhitespace();      parseUntil("?>");    }    if (handler != null) {      handler.processingInstruction(name, dataBufferToString());    }  }  /**    * Parse a CDATA marked section.    * <pre>    * [20] CDSect ::= CDStart CData CDEnd    * [21] CDStart ::= '&lt;![CDATA['    * [22] CData ::= (Char* - (Char* ']]&gt;' Char*))    * [23] CDEnd ::= ']]&gt;'    * </pre>    * <p>(The '&lt;![CDATA[' has already been read.)    * <p>Note that this just appends characters to the dataBuffer,    * without actually generating an event.    */  void parseCDSect ()    throws java.lang.Exception  {    parseUntil("]]>");  }  /**    * Parse the prolog of an XML document.    * <pre>    * [24] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?    * </pre>    * <p>There are a couple of tricks here.  First, it is necessary to    * declare the XML default attributes after the DTD (if present)    * has been read.  Second, it is not possible to expand general    * references in attribute value literals until after the entire    * DTD (if present) has been parsed.    * <p>We do not look for the XML declaration here, because it is    * handled by pushURL().    * @see pushURL    */  void parseProlog ()    throws java.lang.Exception  {    parseMisc();    if (tryRead("<!DOCTYPE")) {      parseDoctypedecl();      parseMisc();    }  }  /**    * Parse the XML declaration.    * <pre>    * [25] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'    * [26] VersionInfo ::= S 'version' Eq ('"1.0"' | "'1.0'")    * [33] SDDecl ::= S 'standalone' Eq "'" ('yes' | 'no') "'"    *               | S 'standalone' Eq '"' ("yes" | "no") '"'    * [78] EncodingDecl ::= S 'encoding' Eq QEncoding    * </pre>    * <p>([80] to [82] are also significant.)    * <p>(The <code>&lt;?xml</code> and whitespace have already been read.)    * <p>TODO: validate value of standalone.    * @see #parseTextDecl    * @see #checkEncoding    */  void parseXMLDecl (boolean ignoreEncoding)    throws java.lang.Exception  {    String version;    String encodingName = null;    String standalone = null;				// Read the version.    require("version");    parseEq();    version = readLiteral(0);    if (!version.equals("1.0")) {      error("unsupported XML version", version, "1.0");    }				// Try reading an encoding declaration.    skipWhitespace();    if (tryRead("encoding")) {      parseEq();      encodingName = readLiteral(0);      checkEncoding(encodingName, ignoreEncoding);    }				// Try reading a standalone declaration    skipWhitespace();    if (tryRead("standalone")) {      parseEq();      standalone = readLiteral(0);    }    skipWhitespace();    require("?>");  }  /**    * Parse the Encoding PI.    * <pre>    * [78] EncodingDecl ::= S 'encoding' Eq QEncoding    * [79] EncodingPI ::= '&lt;?xml' S 'encoding' Eq QEncoding S? '?&gt;'    * [80] QEncoding ::= '"' Encoding '"' | "'" Encoding "'"    * [81] Encoding ::= LatinName    * [82] LatinName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*    * </pre>    * <p>(The <code>&lt;?xml</code>' and whitespace have already been read.)    * @see #parseXMLDecl    * @see #checkEncoding    */  void parseTextDecl (boolean ignoreEncoding)    throws java.lang.Exception  {    String encodingName = null;    				// Read an optional version.    if (tryRead("version")) {      String version;      parseEq();      version = readLiteral(0);      if (!version.equals("1.0")) {	error("unsupported XML version", version, "1.0");      }      requireWhitespace();    }      				// Read the encoding.    require("encoding");    parseEq();    encodingName = readLiteral(0);    checkEncoding(encodingName, ignoreEncoding);    skipWhitespace();    require("?>");  }  /**    * Check that the encoding specified makes sense.    * <p>Compare what the author has specified in the XML declaration    * or encoding PI with what we have detected.    * <p>This is also important for distinguishing among the various    * 7- and 8-bit encodings, such as ISO-LATIN-1 (I cannot autodetect    * those).    * @param encodingName The name of the encoding specified by the user.    * @see #parseXMLDecl    * @see #parseTextDecl    */  void checkEncoding (String encodingName, boolean ignoreEncoding)    throws java.lang.Exception  {    encodingName = encodingName.toUpperCase();    if (ignoreEncoding) {      return;    }    switch (encoding) {				// 8-bit encodings    case ENCODING_UTF_8:      if (encodingName.equals("ISO-8859-1")) {	encoding = ENCODING_ISO_8859_1;      } else if (!encodingName.equals("UTF-8")) {	error("unsupported 8-bit encoding",	      encodingName,	      "UTF-8 or ISO-8859-1");      }      break;				// 16-bit encodings    case ENCODING_UCS_2_12:    case ENCODING_UCS_2_21:      if (!encodingName.equals("ISO-10646-UCS-2") &&	  !encodingName.equals("UTF-16")) {	error("unsupported 16-bit encoding",	      encodingName,	      "ISO-10646-UCS-2");      }      break;				// 32-bit encodings    case ENCODING_UCS_4_1234:    case ENCODING_UCS_4_4321:    case ENCODING_UCS_4_2143:    case ENCODING_UCS_4_3412:      if (!encodingName.equals("ISO-10646-UCS-4")) {	error("unsupported 32-bit encoding",	      encodingName,	      "ISO-10646-UCS-4");      }    }  }  /**    * Parse miscellaneous markup outside the document element and DOCTYPE    * declaration.    * <pre>    * [27] Misc ::= Comment | PI | S    * </pre>    */  void parseMisc ()    throws java.lang.Exception    {    while (true)      {      skipWhitespace();      if (tryRead("<?"))        {parsePI();}      else if (tryRead("<!--"))        {parseComment();}      else        {return;}      }    }  /**    * Parse a document type declaration.    * <pre>    * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?    *                      ('[' %markupdecl* ']' S?)? '&gt;'    * </pre>    * <p>(The <code>&lt;!DOCTYPE</code> has already been read.)    */  void parseDoctypedecl ()    throws java.lang.Exception  {    char c;    String doctypeName, ids[];				// Read the document type name.    requireWhitespace();    doctypeName = readNmtoken(true);				// Read the ExternalIDs.    skipWhitespace();    ids = readExternalIds(false);				// Look for a declaration subset.    skipWhitespace();    if (tryRead('[')) {				// loop until the subset ends      while (true) {	context = CONTEXT_DTD;	skipWhitespace();	context = CONTEXT_NONE;	if (tryRead(']')) {	  break;		// end of subset	} else {	  context = CONTEXT_DTD;	  parseMarkupdecl();	  context = CONTEXT_NONE;	}      }    }				// Read the external subset, if any    if (ids[1] != null) {      pushURL("[external subset]", ids[0], ids[1], null, null, null);				// Loop until we end up back at '>'      while (true) {	context = CONTEXT_DTD;	skipWhitespace();	context = CONTEXT_NONE;	if (tryRead('>')) {	  break;	} else {	  context = CONTEXT_DTD;	  parseMarkupdecl();	  context = CONTEXT_NONE;	}      }    } else {				// No external subset.      skipWhitespace();      require('>');    }    if (handler != null) {      handler.doctypeDecl(doctypeName, ids[0], ids[1]);    }				// Expand general entities in				// default values of attributes.				// (Do this after the doctypeDecl				// event!).    // expandAttributeDefaultValues();  }  /**    * Parse a markup declaration in the internal or external DTD subset.    * <pre>    * [29] markupdecl ::= ( %elementdecl | %AttlistDecl | %EntityDecl |    *                       %NotationDecl | %PI | %S | %Comment |    *                       InternalPERef )    * [30] InternalPERef ::= PEReference    * [31] extSubset ::= (%markupdecl | %conditionalSect)*    * </pre>    */  void parseMarkupdecl ()    throws java.lang.Exception  {    if (tryRead("<!ELEMENT")) {      parseElementdecl();    } else if (tryRead("<!ATTLIST")) {      parseAttlistDecl();    } else if (tryRead("<!ENTITY")) {      parseEntityDecl();    } else if (tryRead("<!NOTATION")) {      parseNotationDecl();    } else if (tryRead("<?")) {      parsePI();    } else if (tryRead("<!--")) {      parseComment();    } else if (tryRead("<![")) {      parseConditionalSect();    } else {      error("expected markup declaration", null, null);    }  }  /**    * Parse an element, with its tags.    * <pre>    * [33] STag ::= '&lt;' Name (S Attribute)* S? '&gt;' [WFC: unique Att spec]    * [38] element ::= EmptyElement | STag content ETag    * [39] EmptyElement ::= '&lt;' Name (S Attribute)* S? '/&gt;'    *                       [WFC: unique Att spec]    * </pre>    * <p>(The '&lt;' has already been read.)    * <p>NOTE: this method actually chains onto parseContent(), if necessary,    * and parseContent() will take care of calling parseETag().    */  void parseElement ()    throws java.lang.Exception  {    String gi;    char c;    int oldElementContent = currentElementContent;    String oldElement = currentElement;				// This is the (global) counter for the				// array of specified attributes.    tagAttributePos = 0;				// Read the element type name.    gi = readNmtoken(true);				// Determine the current content type.    currentElement = gi;    currentElementContent = getElementContentType(gi);    if (currentElementContent == CONTENT_UNDECLARED) {      currentElementContent = CONTENT_ANY;    }				// Read the attributes, if any.				// After this loop, we should be just				// in front of the closing delimiter.    skipWhitespace();    c = readCh();    while (c != '/' && c != '>') {      unread(c);      parseAttribute(gi);      skipWhitespace();      c = readCh();    }    unread(c);				// Supply any defaulted attributes.    Enumeration atts = declaredAttributes(gi);    if (atts != null) {      String aname;    loop: while (atts.hasMoreElements()) {      aname = (String)atts.nextElement();				// See if it was specified.      for (int i = 0; i < tagAttributePos; i++) {	if (tagAttributes[i] == aname) {	  continue loop;	}      }				// I guess not...      if (handler != null) {	handler.attribute(aname,			  getAttributeExpandedValue(gi, aname),			  false);      }    }    }				// Figure out if this is a start tag				// or an empty element, and dispatch an				// event accordingly.    c = readCh();    switch (c) {    case '>':      if (handler != null) {	handler.startElement(gi);      }      parseContent();      break;    case '/':      require('>');      if (handler != null) {	handler.startElement(gi);	handler.endElement(gi);      }      break;    }				// Restore the previous state.    currentElement = oldElement;    currentElementContent = oldElementContent;  }  /**    * Parse an attribute assignment.    * <pre>    * [34] Attribute ::= Name Eq AttValue    * </pre>    * @param name The name of the attribute's element.    * @see XmlHandler#attribute    */  void parseAttribute (String name)    throws java.lang.Exception  {    String aname;    int type;    String value;				// Read the attribute name.    aname = readNmtoken(true).intern();    type = getAttributeDefaultValueType(name, aname);				// Parse '='    parseEq();				// Read the value, normalizing whitespace				// if it is not CDATA.    if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) {      value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF);    } else {      value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF | LIT_NORMALIZE);    }				// Inform the handler about the				// attribute.    if (handler != null) {      handler.attribute(aname, value, true);    }    dataBufferPos = 0;				// Note that the attribute has been				// specified.    if (tagAttributePos == tagAttributes.length) {      String newAttrib[] = new String[tagAttributes.length * 2];      System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);      tagAttributes = newAttrib;    }    tagAttributes[tagAttributePos++] = aname;  }  /**    * Parse an equals sign surrounded by optional whitespace.    * [35] Eq ::= S? '=' S?    */  void parseEq ()    throws java.lang.Exception  {    skipWhitespace();    require('=');    skipWhitespace();  }  /**    * Parse an end tag.    * [36] ETag ::= '</' Name S? '>'    * *NOTE: parseContent() chains to here.    */  void parseETag ()    throws java.lang.Exception  {    String name;    name = readNmtoken(true);    if (name != currentElement) {      error("mismatched end tag", name, currentElement);    }    skipWhitespace();    require('>');    if (handler != null) {      handler.endElement(name);    }  }  /**    * Parse the content of an element.    * [37] content ::= (element | PCData | Reference | CDSect | PI | Comment)*    * [68] Reference ::= EntityRef | CharRef    */  void parseContent ()    throws java.lang.Exception  {    String data;    char c;    while (true) {      switch (currentElementContent) {      case CONTENT_ANY:      case CONTENT_MIXED:	parsePCData();	break;      case CONTENT_ELEMENTS:	parseWhitespace();	break;      }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -