📄 xmlparser.java

📁 An open_source WAP browser. include Java code. support WML documents and WBMP images.
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    if (name != currentElement) {
      error("mismatched end tag", name, currentElement);
    }
    skipWhitespace();
    require('>');
    if (handler != null) {
      handler.endElement(name);
    }
  }


  /**
    * Parse the content of an element.
    * [37] content ::= (element | PCData | Reference | CDSect | PI | Comment)*
    * [68] Reference ::= EntityRef | CharRef
    */
  void parseContent ()
    throws java.lang.Exception
  {
    String data;
    char c;

    while (true) {

      switch (currentElementContent) {
      case CONTENT_ANY:
      case CONTENT_MIXED:
	parsePCData();
	break;
      case CONTENT_ELEMENTS:
	parseWhitespace();
	break;
      }

				// Handle delimiters
      c = readCh();
      switch (c) {

      case '&':			// Found "&"
	c = readCh();
	if (c == '#') {
	  parseCharRef();
	} else {
	  unread(c);
	  parseEntityRef(true);
	}
	break;

      case '<':			// Found "<"

	c = readCh();
	switch (c) {

	case '!':		// Found "<!"
	  c = readCh();
	  switch (c) {
	  case '-':		// Found "<!-"
	    require('-');
	    parseComment();
	    break;
	  case '[':		// Found "<!["
	    require("CDATA[");
	    parseCDSect();
	    break;
	  default:
	    error("expected comment or CDATA section", c, null);
	    break;
	  }
	  break;

	case '?':		// Found "<?"
	  dataBufferFlush();
	  parsePI();
	  break;

	case '/':		// Found "</"
	  dataBufferFlush();
	  parseETag();
	  return;

	default:		// Found "<" followed by something else
	  dataBufferFlush();
	  unread(c);
	  parseElement();
	  break;
	}
      }
    }
  }


  /**
    * Parse an element type declaration.
    * [40] elementdecl ::= '<!ELEMENT' S %Name S (%S S)? %contentspec S? '>'
    *                      [VC: Unique Element Declaration]
    * *NOTE: the '<!ELEMENT' has already been read.
    */
  void parseElementdecl ()
    throws java.lang.Exception
  {
    String name;

    requireWhitespace();
				// Read the element type name.
    name = readNmtoken(true);

    requireWhitespace();
				// Read the content model.
    parseContentspec(name);

    skipWhitespace();
    require('>');
  }


  /**
    * Content specification.
    * [41] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
    */
  void parseContentspec (String name)
    throws java.lang.Exception
  {
    if (tryRead("EMPTY")) {
      setElement(name, CONTENT_EMPTY, null, null);
      return;
    } else if (tryRead("ANY")) {
      setElement(name, CONTENT_ANY, null, null);
      return;
    } else {
      require('(');
      dataBufferAppend('(');
      skipWhitespace();
      if (tryRead("#PCDATA")) {
	dataBufferAppend("#PCDATA");
	parseMixed();
	setElement(name, CONTENT_MIXED, dataBufferToString(), null);
      } else {
	parseElements();
	setElement(name, CONTENT_ELEMENTS, dataBufferToString(), null);
      }
    }
  }


  /**
    * Parse an element-content model.
    * [42] elements ::= (choice | seq) ('?' | '*' | '+')?
    * [44] cps ::= S? %cp S?
    * [45] choice ::= '(' S? %ctokplus (S? '|' S? %ctoks)* S? ')'
    * [46] ctokplus ::= cps ('|' cps)+
    * [47] ctoks ::= cps ('|' cps)*
    * [48] seq ::= '(' S? %stoks (S? ',' S? %stoks)* S? ')'
    * [49] stoks ::= cps (',' cps)*
    * *NOTE: the opening '(' and S have already been read.
    * *TODO: go over parameter entity boundaries more carefully.
    */
  void parseElements ()
    throws java.lang.Exception
  {
    char c;
    char sep;

				// Parse the first content particle
    skipWhitespace();
    parseCp();

				// Check for end or for a separator.
    skipWhitespace();
    c = readCh();
    switch (c) {
    case ')':
      dataBufferAppend(')');
      c = readCh();
      switch (c) {
      case '*':
      case '+':
      case '?':
	dataBufferAppend(c);
	break;
      default:
	unread(c);
      }
      return;
    case ',':			// Register the separator.
    case '|':
      sep = c;
      dataBufferAppend(c);
      break;
    default:
      error("bad separator in content model", c, null);
      return;
    }

				// Parse the rest of the content model.
    while (true) {
      skipWhitespace();
      parseCp();
      skipWhitespace();
      c = readCh();
      if (c == ')') {
	dataBufferAppend(')');
	break;
      } else if (c != sep) {
	error("bad separator in content model", c, null);
	return;
      } else {
	dataBufferAppend(c);
      }
    }

				// Check for the occurrence indicator.
    c = readCh();
    switch (c) {
    case '?':
    case '*':
    case '+':
      dataBufferAppend(c);
      return;
    default:
      unread(c);
      return;
    }
  }


  /**
    * Parse a content particle.
    * [43] cp ::= (Name | choice | seq) ('?' | '*' | '+')
    * *NOTE: I actually use a slightly different production here:
    *        cp ::= (elements | (Name ('?' | '*' | '+')?))
    */
  void parseCp ()
    throws java.lang.Exception
  {
    char c;

    if (tryRead('(')) {
      dataBufferAppend('(');
      parseElements();
    } else {
      dataBufferAppend(readNmtoken(true));
      c = readCh();
      switch (c) {
      case '?':
      case '*':
      case '+':
	dataBufferAppend(c);
	break;
      default:
	unread(c);
	break;
      }
    }
  }


  /**
    * Parse mixed content.
    * [50] Mixed ::= '(' S? %( %'#PCDATA' (S? '|' S? %Mtoks)* ) S? ')*'
    *              | '(' S? %('#PCDATA') S? ')'
    * [51] Mtoks ::= %Name (S? '|' S? %Name)*
    * *NOTE: the S and '#PCDATA' have already been read.
    */
  void parseMixed ()
    throws java.lang.Exception
  {
    char c;

				// Check for PCDATA alone.
    skipWhitespace();
    if (tryRead(')')) {
      dataBufferAppend(")*");
      tryRead('*');
      return;
    }

				// Parse mixed content.
    skipWhitespace();
    while (!tryRead(")*")) {
      require('|');
      dataBufferAppend('|');
      skipWhitespace();
      dataBufferAppend(readNmtoken(true));
      skipWhitespace();
    }
    dataBufferAppend(")*");
  }


  /**
    * Parse an attribute list declaration.
    * [52] AttlistDecl ::= '<!ATTLIST' S %Name S? %AttDef+ S? '>'
    * *NOTE: the '<!ATTLIST' has already been read.
    */
  void parseAttlistDecl ()
    throws java.lang.Exception
  {
    String elementName;

    requireWhitespace();
    elementName = readNmtoken(true);
    requireWhitespace();
    while (!tryRead('>')) {
      parseAttDef(elementName);
      skipWhitespace();
    }
  }


  /**
    * Parse a single attribute definition.
    * [53] AttDef ::= S %Name S %AttType S %Default
    */
  void parseAttDef (String elementName)
    throws java.lang.Exception
  {
    String name;
    int type;
    String enum = null;

				// Read the attribute name.
    name = readNmtoken(true);

				// Read the attribute type.
    requireWhitespace();
    type = readAttType();

				// Get the string of enumerated values
				// if necessary.
    if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
      enum = dataBufferToString();
    }

				// Read the default value.
    requireWhitespace();
    parseDefault(elementName, name, type, enum);
  }


  /**
    * Parse the attribute type.
    * [54] AttType ::= StringType | TokenizedType | EnumeratedType
    * [55] StringType ::= 'CDATA'
    * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' |
    *                        'NMTOKEN' | 'NMTOKENS'
    * [57] EnumeratedType ::= NotationType | Enumeration
    * *TODO: validate the type!!
    */
  int readAttType ()
    throws java.lang.Exception
  {
    String typeString;
    Integer type;

    if (tryRead('(')) {
      parseEnumeration();
      return ATTRIBUTE_ENUMERATED;
    } else {
      typeString = readNmtoken(true);
      if (typeString.equals("NOTATION")) {
	parseNotationType();
      }
      type = (Integer)attributeTypeHash.get(typeString);
      if (type == null) {
	error("illegal attribute type", typeString, null);
	return ATTRIBUTE_UNDECLARED;
      } else {
	return type.intValue();
      }
    }
  }


  /**
    * Parse an enumeration.
    * [60] Enumeration ::= '(' S? %Etoks (S? '|' S? %Etoks)* S? ')'
    * [61] Etoks ::= %Nmtoken (S? '|' S? %Nmtoken)*
    * *NOTE: the '(' has already been read.
    */
  void parseEnumeration ()
    throws java.lang.Exception
  {
    char c;

    dataBufferAppend('(');

				// Read the first token.
    skipWhitespace();
    dataBufferAppend(readNmtoken(true));
				// Read the remaining tokens.
    skipWhitespace();
    while (!tryRead(')')) {
      require('|');
      dataBufferAppend('|');
      skipWhitespace();
      dataBufferAppend(readNmtoken(true));
      skipWhitespace();
    }
    dataBufferAppend(')');
  }


  /**
    * Parse a notation type for an attribute.
    * [58] NotationType ::= %'NOTATION' S '(' S? %Ntoks (S? '|' S? %Ntoks)*
    *                       S? ')'
    * [59] Ntoks ::= %Name (S? '|' S? %Name)
    * *NOTE: the 'NOTATION' has already been read
    */
  void parseNotationType ()
    throws java.lang.Exception
  {
    requireWhitespace();
    require('(');

    parseEnumeration();
  }


  /**
    * Parse the default value for an attribute.
    * [62] Default ::= '#REQUIRED' | '#IMPLIED' | ((%'#FIXED' S)? %AttValue
    */
  void parseDefault (String elementName, String name, int type, String enum)
    throws java.lang.Exception
  {
    int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
    String value = null;
    boolean normalizeWSFlag;

    if (tryRead('#')) {
      if (tryRead("FIXED")) {
	valueType = ATTRIBUTE_DEFAULT_FIXED;
	requireWhitespace();
	context = CONTEXT_ATTRIBUTEVALUE;
	value = readLiteral(LIT_CHAR_REF);
	context = CONTEXT_DTD;
      } else if (tryRead("REQUIRED")) {
	valueType = ATTRIBUTE_DEFAULT_REQUIRED;
      } else if (tryRead("IMPLIED")) {
	valueType = ATTRIBUTE_DEFAULT_IMPLIED;
      } else {
	error("illegal keyword for attribute default value", null, null);
      }
    } else {
      context = CONTEXT_ATTRIBUTEVALUE;
      value = readLiteral(LIT_CHAR_REF);
      context = CONTEXT_DTD;
    }
    setAttribute(elementName, name, type, enum, value, valueType);
  }


  /**
    * Parse a conditional section.
    * [63] conditionalSect ::= includeSect || ignoreSect
    * [64] includeSect ::= '<![' %'INCLUDE' '[' (%markupdecl*)* ']]>'
    * [65] ignoreSect ::= '<![' %'IGNORE' '[' ignoreSectContents* ']]>'
    * [66] ignoreSectContents ::= ((SkipLit | Comment | PI) -(Char* ']]>'))
    *                           | ('<![' ignoreSectContents* ']]>')
    *                           | (Char - (']' | [<'"]))
    *                           | ('<!' (Char - ('-' | '[')))
    * *NOTE: the '<![' has already been read.
    * *TODO: verify that I am handling ignoreSectContents right.
    */
  void parseConditionalSect ()
    throws java.lang.Exception
  {
    skipWhitespace();
    if (tryRead("INCLUDE")) {
      skipWhitespace();
      require('[');
      skipWhitespace();
      while (!tryRead("]]>")) {
	parseMarkupdecl();
	skipWhitespace();
      }
    } else if (tryRead("IGNORE")) {
      skipWhitespace();
      require('[');
      int nesting = 1;
      char c;
      for (int nest = 1; nest > 0; ) {
	c = readCh();
	switch (c) {
	case '<':
	  if (tryRead("![")) {
	    nest++;
	  }
	case ']':
	  if (tryRead("]>")) {
	    nest--;
	  }
	}
      }
    } else {
      error("conditional section must begin with INCLUDE or IGNORE",
	    null, null);
    }
  }


  /**
    * Read a character reference.
    * [67] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
    * *NOTE: the '&#' has already been read.
    */
  void parseCharRef ()
    throws java.lang.Exception
  {
    int value = 0;
    char c;

    if (tryRead('x')) {
      loop1: while (true) {
	c = readCh();
	switch (c) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -