📄 xmlparser.java

📁 An open_source WAP browser. include Java code. support WML documents and WBMP images.
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    skipUntil("-->");
  }


  /**
    * Parse a processing instruction and do a call-back.
    * <pre>
    * [19] PI ::= '&lt;?' Name (S (Char* - (Char* '?&gt;' Char*)))? '?&gt;'
    * </pre>
    * <p>(The <code>&lt;?</code> has already been read.)
    * <p>An XML processing instruction <em>must</em> begin with
    * a Name, which is the instruction's target.
    */
  void parsePI ()
    throws java.lang.Exception
  {
    String name;

    name = readNmtoken(true);
    if (!tryRead("?>")) {
      requireWhitespace();
      parseUntil("?>");
    }
    if (handler != null) {
      handler.processingInstruction(name, dataBufferToString());
    }
  }


  /**
    * Parse a CDATA marked section.
    * <pre>
    * [20] CDSect ::= CDStart CData CDEnd
    * [21] CDStart ::= '&lt;![CDATA['
    * [22] CData ::= (Char* - (Char* ']]&gt;' Char*))
    * [23] CDEnd ::= ']]&gt;'
    * </pre>
    * <p>(The '&lt;![CDATA[' has already been read.)
    * <p>Note that this just appends characters to the dataBuffer,
    * without actually generating an event.
    */
  void parseCDSect ()
    throws java.lang.Exception
  {
    parseUntil("]]>");
  }


  /**
    * Parse the prolog of an XML document.
    * <pre>
    * [24] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
    * </pre>
    * <p>There are a couple of tricks here.  First, it is necessary to
    * declare the XML default attributes after the DTD (if present)
    * has been read.  Second, it is not possible to expand general
    * references in attribute value literals until after the entire
    * DTD (if present) has been parsed.
    * <p>We do not look for the XML declaration here, because it is
    * handled by pushURL().
    * @see pushURL
    */
  void parseProlog ()
    throws java.lang.Exception
  {
    parseMisc();

    if (tryRead("<!DOCTYPE")) {
      parseDoctypedecl();
      parseMisc();
    }
  }


  /**
    * Parse the XML declaration.
    * <pre>
    * [25] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'
    * [26] VersionInfo ::= S 'version' Eq ('"1.0"' | "'1.0'")
    * [33] SDDecl ::= S 'standalone' Eq "'" ('yes' | 'no') "'"
    *               | S 'standalone' Eq '"' ("yes" | "no") '"'
    * [78] EncodingDecl ::= S 'encoding' Eq QEncoding
    * </pre>
    * <p>([80] to [82] are also significant.)
    * <p>(The <code>&lt;?xml</code> and whitespace have already been read.)
    * <p>TODO: validate value of standalone.
    * @see #parseTextDecl
    * @see #checkEncoding
    */
  void parseXMLDecl (boolean ignoreEncoding)
    throws java.lang.Exception
  {
    String version;
    String encodingName = null;
    String standalone = null;

				// Read the version.
    require("version");
    parseEq();
    version = readLiteral(0);
    if (!version.equals("1.0")) {
      error("unsupported XML version", version, "1.0");
    }

				// Try reading an encoding declaration.
    skipWhitespace();
    if (tryRead("encoding")) {
      parseEq();
      encodingName = readLiteral(0);
      checkEncoding(encodingName, ignoreEncoding);
    }

				// Try reading a standalone declaration
    skipWhitespace();
    if (tryRead("standalone")) {
      parseEq();
      standalone = readLiteral(0);
    }

    skipWhitespace();
    require("?>");
  }


  /**
    * Parse the Encoding PI.
    * <pre>
    * [78] EncodingDecl ::= S 'encoding' Eq QEncoding
    * [79] EncodingPI ::= '&lt;?xml' S 'encoding' Eq QEncoding S? '?&gt;'
    * [80] QEncoding ::= '"' Encoding '"' | "'" Encoding "'"
    * [81] Encoding ::= LatinName
    * [82] LatinName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
    * </pre>
    * <p>(The <code>&lt;?xml</code>' and whitespace have already been read.)
    * @see #parseXMLDecl
    * @see #checkEncoding
    */
  void parseTextDecl (boolean ignoreEncoding)
    throws java.lang.Exception
  {
    String encodingName = null;
    
				// Read an optional version.
    if (tryRead("version")) {
      String version;
      parseEq();
      version = readLiteral(0);
      if (!version.equals("1.0")) {
	error("unsupported XML version", version, "1.0");
      }
      requireWhitespace();
    }
      

				// Read the encoding.
    require("encoding");
    parseEq();
    encodingName = readLiteral(0);
    checkEncoding(encodingName, ignoreEncoding);

    skipWhitespace();
    require("?>");
  }


  /**
    * Check that the encoding specified makes sense.
    * <p>Compare what the author has specified in the XML declaration
    * or encoding PI with what we have detected.
    * <p>This is also important for distinguishing among the various
    * 7- and 8-bit encodings, such as ISO-LATIN-1 (I cannot autodetect
    * those).
    * @param encodingName The name of the encoding specified by the user.
    * @see #parseXMLDecl
    * @see #parseTextDecl
    */
  void checkEncoding (String encodingName, boolean ignoreEncoding)
    throws java.lang.Exception
  {
    encodingName = encodingName.toUpperCase();

    if (ignoreEncoding) {
      return;
    }

    switch (encoding) {
				// 8-bit encodings
    case ENCODING_UTF_8:
      if (encodingName.equals("ISO-8859-1")) {
	encoding = ENCODING_ISO_8859_1;
      } else if (!encodingName.equals("UTF-8")) {
	error("unsupported 8-bit encoding",
	      encodingName,
	      "UTF-8 or ISO-8859-1");
      }
      break;
				// 16-bit encodings
    case ENCODING_UCS_2_12:
    case ENCODING_UCS_2_21:
      if (!encodingName.equals("ISO-10646-UCS-2") &&
	  !encodingName.equals("UTF-16")) {
	error("unsupported 16-bit encoding",
	      encodingName,
	      "ISO-10646-UCS-2");
      }
      break;
				// 32-bit encodings
    case ENCODING_UCS_4_1234:
    case ENCODING_UCS_4_4321:
    case ENCODING_UCS_4_2143:
    case ENCODING_UCS_4_3412:
      if (!encodingName.equals("ISO-10646-UCS-4")) {
	error("unsupported 32-bit encoding",
	      encodingName,
	      "ISO-10646-UCS-4");
      }
    }
  }


  /**
    * Parse miscellaneous markup outside the document element and DOCTYPE
    * declaration.
    * <pre>
    * [27] Misc ::= Comment | PI | S
    * </pre>
    */
  void parseMisc ()
    throws java.lang.Exception
    {
    while (true)
      {
      skipWhitespace();
      if (tryRead("<?"))
        {parsePI();}
      else if (tryRead("<!--"))
        {parseComment();}
      else
        {return;}
      }
    }


  /**
    * Parse a document type declaration.
    * <pre>
    * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?
    *                      ('[' %markupdecl* ']' S?)? '&gt;'
    * </pre>
    * <p>(The <code>&lt;!DOCTYPE</code> has already been read.)
    */
  void parseDoctypedecl ()
    throws java.lang.Exception
  {
    char c;
    String doctypeName, ids[];

				// Read the document type name.
    requireWhitespace();
    doctypeName = readNmtoken(true);

				// Read the ExternalIDs.
    skipWhitespace();
    ids = readExternalIds(false);

				// Look for a declaration subset.
    skipWhitespace();
    if (tryRead('[')) {

				// loop until the subset ends
      while (true) {
	context = CONTEXT_DTD;
	skipWhitespace();
	context = CONTEXT_NONE;
	if (tryRead(']')) {
	  break;		// end of subset
	} else {
	  context = CONTEXT_DTD;
	  parseMarkupdecl();
	  context = CONTEXT_NONE;
	}
      }
    }

				// Read the external subset, if any
    if (ids[1] != null) {
      pushURL("[external subset]", ids[0], ids[1], null, null, null);

				// Loop until we end up back at '>'
      while (true) {
	context = CONTEXT_DTD;
	skipWhitespace();
	context = CONTEXT_NONE;
	if (tryRead('>')) {
	  break;
	} else {
	  context = CONTEXT_DTD;
	  parseMarkupdecl();
	  context = CONTEXT_NONE;
	}
      }
    } else {
				// No external subset.
      skipWhitespace();
      require('>');
    }

    if (handler != null) {
      handler.doctypeDecl(doctypeName, ids[0], ids[1]);
    }

				// Expand general entities in
				// default values of attributes.
				// (Do this after the doctypeDecl
				// event!).
    // expandAttributeDefaultValues();
  }


  /**
    * Parse a markup declaration in the internal or external DTD subset.
    * <pre>
    * [29] markupdecl ::= ( %elementdecl | %AttlistDecl | %EntityDecl |
    *                       %NotationDecl | %PI | %S | %Comment |
    *                       InternalPERef )
    * [30] InternalPERef ::= PEReference
    * [31] extSubset ::= (%markupdecl | %conditionalSect)*
    * </pre>
    */
  void parseMarkupdecl ()
    throws java.lang.Exception
  {
    if (tryRead("<!ELEMENT")) {
      parseElementdecl();
    } else if (tryRead("<!ATTLIST")) {
      parseAttlistDecl();
    } else if (tryRead("<!ENTITY")) {
      parseEntityDecl();
    } else if (tryRead("<!NOTATION")) {
      parseNotationDecl();
    } else if (tryRead("<?")) {
      parsePI();
    } else if (tryRead("<!--")) {
      parseComment();
    } else if (tryRead("<![")) {
      parseConditionalSect();
    } else {
      error("expected markup declaration", null, null);
    }
  }


  /**
    * Parse an element, with its tags.
    * <pre>
    * [33] STag ::= '&lt;' Name (S Attribute)* S? '&gt;' [WFC: unique Att spec]
    * [38] element ::= EmptyElement | STag content ETag
    * [39] EmptyElement ::= '&lt;' Name (S Attribute)* S? '/&gt;'
    *                       [WFC: unique Att spec]
    * </pre>
    * <p>(The '&lt;' has already been read.)
    * <p>NOTE: this method actually chains onto parseContent(), if necessary,
    * and parseContent() will take care of calling parseETag().
    */
  void parseElement ()
    throws java.lang.Exception
  {
    String gi;
    char c;
    int oldElementContent = currentElementContent;
    String oldElement = currentElement;

				// This is the (global) counter for the
				// array of specified attributes.
    tagAttributePos = 0;

				// Read the element type name.
    gi = readNmtoken(true);

				// Determine the current content type.
    currentElement = gi;
    currentElementContent = getElementContentType(gi);
    if (currentElementContent == CONTENT_UNDECLARED) {
      currentElementContent = CONTENT_ANY;
    }

				// Read the attributes, if any.
				// After this loop, we should be just
				// in front of the closing delimiter.
    skipWhitespace();
    c = readCh();
    while (c != '/' && c != '>') {
      unread(c);
      parseAttribute(gi);
      skipWhitespace();
      c = readCh();
    }
    unread(c);

				// Supply any defaulted attributes.
    Enumeration atts = declaredAttributes(gi);
    if (atts != null) {
      String aname;
    loop: while (atts.hasMoreElements()) {
      aname = (String)atts.nextElement();
				// See if it was specified.
      for (int i = 0; i < tagAttributePos; i++) {
	if (tagAttributes[i] == aname) {
	  continue loop;
	}
      }
				// I guess not...
      if (handler != null) {
	handler.attribute(aname,
			  getAttributeExpandedValue(gi, aname),
			  false);
      }
    }
    }

				// Figure out if this is a start tag
				// or an empty element, and dispatch an
				// event accordingly.
    c = readCh();
    switch (c) {
    case '>':
      if (handler != null) {
	handler.startElement(gi);
      }
      parseContent();
      break;
    case '/':
      require('>');
      if (handler != null) {
	handler.startElement(gi);
	handler.endElement(gi);
      }
      break;
    }

				// Restore the previous state.
    currentElement = oldElement;
    currentElementContent = oldElementContent;
  }


  /**
    * Parse an attribute assignment.
    * <pre>
    * [34] Attribute ::= Name Eq AttValue
    * </pre>
    * @param name The name of the attribute's element.
    * @see XmlHandler#attribute
    */
  void parseAttribute (String name)
    throws java.lang.Exception
  {
    String aname;
    int type;
    String value;

				// Read the attribute name.
    aname = readNmtoken(true).intern();
    type = getAttributeDefaultValueType(name, aname);

				// Parse '='
    parseEq();

				// Read the value, normalizing whitespace
				// if it is not CDATA.
    if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) {
      value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF);
    } else {
      value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF | LIT_NORMALIZE);
    }

				// Inform the handler about the
				// attribute.
    if (handler != null) {
      handler.attribute(aname, value, true);
    }
    dataBufferPos = 0;

				// Note that the attribute has been
				// specified.
    if (tagAttributePos == tagAttributes.length) {
      String newAttrib[] = new String[tagAttributes.length * 2];
      System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);
      tagAttributes = newAttrib;
    }
    tagAttributes[tagAttributePos++] = aname;
  }


  /**
    * Parse an equals sign surrounded by optional whitespace.
    * [35] Eq ::= S? '=' S?
    */
  void parseEq ()
    throws java.lang.Exception
  {
    skipWhitespace();
    require('=');
    skipWhitespace();
  }


  /**
    * Parse an end tag.
    * [36] ETag ::= '</' Name S? '>'
    * *NOTE: parseContent() chains to here.
    */
  void parseETag ()
    throws java.lang.Exception
  {
    String name;
    name = readNmtoken(true);
💿 文件大小 1087 K
👤 上传用户 chengshengwu123
📂 所属分类手机WAP编程
🏷️ 相关标签

#open_source #documents #browser #include
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -