📄 xmlparser.java
字号:
// Handle delimiters c = readCh(); switch (c) { case '&': // Found "&" c = readCh(); if (c == '#') { parseCharRef(); } else { unread(c); parseEntityRef(true); } break; case '<': // Found "<" c = readCh(); switch (c) { case '!': // Found "<!" c = readCh(); switch (c) { case '-': // Found "<!-" require('-'); parseComment(); break; case '[': // Found "<![" require("CDATA["); parseCDSect(); break; default: error("expected comment or CDATA section", c, null); break; } break; case '?': // Found "<?" dataBufferFlush(); parsePI(); break; case '/': // Found "</" dataBufferFlush(); parseETag(); return; default: // Found "<" followed by something else dataBufferFlush(); unread(c); parseElement(); break; } } } } /** * Parse an element type declaration. * [40] elementdecl ::= '<!ELEMENT' S %Name S (%S S)? %contentspec S? '>' * [VC: Unique Element Declaration] * *NOTE: the '<!ELEMENT' has already been read. */ void parseElementdecl () throws java.lang.Exception { String name; requireWhitespace(); // Read the element type name. name = readNmtoken(true); requireWhitespace(); // Read the content model. parseContentspec(name); skipWhitespace(); require('>'); } /** * Content specification. * [41] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements */ void parseContentspec (String name) throws java.lang.Exception { if (tryRead("EMPTY")) { setElement(name, CONTENT_EMPTY, null, null); return; } else if (tryRead("ANY")) { setElement(name, CONTENT_ANY, null, null); return; } else { require('('); dataBufferAppend('('); skipWhitespace(); if (tryRead("#PCDATA")) { dataBufferAppend("#PCDATA"); parseMixed(); setElement(name, CONTENT_MIXED, dataBufferToString(), null); } else { parseElements(); setElement(name, CONTENT_ELEMENTS, dataBufferToString(), null); } } } /** * Parse an element-content model. * [42] elements ::= (choice | seq) ('?' | '*' | '+')? * [44] cps ::= S? %cp S? * [45] choice ::= '(' S? %ctokplus (S? '|' S? %ctoks)* S? ')' * [46] ctokplus ::= cps ('|' cps)+ * [47] ctoks ::= cps ('|' cps)* * [48] seq ::= '(' S? %stoks (S? ',' S? %stoks)* S? ')' * [49] stoks ::= cps (',' cps)* * *NOTE: the opening '(' and S have already been read. * *TODO: go over parameter entity boundaries more carefully. */ void parseElements () throws java.lang.Exception { char c; char sep; // Parse the first content particle skipWhitespace(); parseCp(); // Check for end or for a separator. skipWhitespace(); c = readCh(); switch (c) { case ')': dataBufferAppend(')'); c = readCh(); switch (c) { case '*': case '+': case '?': dataBufferAppend(c); break; default: unread(c); } return; case ',': // Register the separator. case '|': sep = c; dataBufferAppend(c); break; default: error("bad separator in content model", c, null); return; } // Parse the rest of the content model. while (true) { skipWhitespace(); parseCp(); skipWhitespace(); c = readCh(); if (c == ')') { dataBufferAppend(')'); break; } else if (c != sep) { error("bad separator in content model", c, null); return; } else { dataBufferAppend(c); } } // Check for the occurrence indicator. c = readCh(); switch (c) { case '?': case '*': case '+': dataBufferAppend(c); return; default: unread(c); return; } } /** * Parse a content particle. * [43] cp ::= (Name | choice | seq) ('?' | '*' | '+') * *NOTE: I actually use a slightly different production here: * cp ::= (elements | (Name ('?' | '*' | '+')?)) */ void parseCp () throws java.lang.Exception { char c; if (tryRead('(')) { dataBufferAppend('('); parseElements(); } else { dataBufferAppend(readNmtoken(true)); c = readCh(); switch (c) { case '?': case '*': case '+': dataBufferAppend(c); break; default: unread(c); break; } } } /** * Parse mixed content. * [50] Mixed ::= '(' S? %( %'#PCDATA' (S? '|' S? %Mtoks)* ) S? ')*' * | '(' S? %('#PCDATA') S? ')' * [51] Mtoks ::= %Name (S? '|' S? %Name)* * *NOTE: the S and '#PCDATA' have already been read. */ void parseMixed () throws java.lang.Exception { char c; // Check for PCDATA alone. skipWhitespace(); if (tryRead(')')) { dataBufferAppend(")*"); tryRead('*'); return; } // Parse mixed content. skipWhitespace(); while (!tryRead(")*")) { require('|'); dataBufferAppend('|'); skipWhitespace(); dataBufferAppend(readNmtoken(true)); skipWhitespace(); } dataBufferAppend(")*"); } /** * Parse an attribute list declaration. * [52] AttlistDecl ::= '<!ATTLIST' S %Name S? %AttDef+ S? '>' * *NOTE: the '<!ATTLIST' has already been read. */ void parseAttlistDecl () throws java.lang.Exception { String elementName; requireWhitespace(); elementName = readNmtoken(true); requireWhitespace(); while (!tryRead('>')) { parseAttDef(elementName); skipWhitespace(); } } /** * Parse a single attribute definition. * [53] AttDef ::= S %Name S %AttType S %Default */ void parseAttDef (String elementName) throws java.lang.Exception { String name; int type; String enumeration = null; // Read the attribute name. name = readNmtoken(true); // Read the attribute type. requireWhitespace(); type = readAttType(); // Get the string of enumerated values // if necessary. if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) { enumeration = dataBufferToString(); } // Read the default value. requireWhitespace(); parseDefault(elementName, name, type, enumeration); } /** * Parse the attribute type. * [54] AttType ::= StringType | TokenizedType | EnumeratedType * [55] StringType ::= 'CDATA' * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | * 'NMTOKEN' | 'NMTOKENS' * [57] EnumeratedType ::= NotationType | Enumeration * *TODO: validate the type!! */ int readAttType () throws java.lang.Exception { String typeString; Integer type; if (tryRead('(')) { parseEnumeration(); return ATTRIBUTE_ENUMERATED; } else { typeString = readNmtoken(true); if (typeString.equals("NOTATION")) { parseNotationType(); } type = (Integer)attributeTypeHash.get(typeString); if (type == null) { error("illegal attribute type", typeString, null); return ATTRIBUTE_UNDECLARED; } else { return type.intValue(); } } } /** * Parse an enumeration. * [60] Enumeration ::= '(' S? %Etoks (S? '|' S? %Etoks)* S? ')' * [61] Etoks ::= %Nmtoken (S? '|' S? %Nmtoken)* * *NOTE: the '(' has already been read. */ void parseEnumeration () throws java.lang.Exception { char c; dataBufferAppend('('); // Read the first token. skipWhitespace(); dataBufferAppend(readNmtoken(true)); // Read the remaining tokens. skipWhitespace(); while (!tryRead(')')) { require('|'); dataBufferAppend('|'); skipWhitespace(); dataBufferAppend(readNmtoken(true)); skipWhitespace(); } dataBufferAppend(')'); } /** * Parse a notation type for an attribute. * [58] NotationType ::= %'NOTATION' S '(' S? %Ntoks (S? '|' S? %Ntoks)* * S? ')' * [59] Ntoks ::= %Name (S? '|' S? %Name) * *NOTE: the 'NOTATION' has already been read */ void parseNotationType () throws java.lang.Exception { requireWhitespace(); require('('); parseEnumeration(); } /** * Parse the default value for an attribute. * [62] Default ::= '#REQUIRED' | '#IMPLIED' | ((%'#FIXED' S)? %AttValue */ void parseDefault (String elementName, String name, int type, String enumeration) throws java.lang.Exception { int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; String value = null; boolean normalizeWSFlag; if (tryRead('#')) { if (tryRead("FIXED")) { valueType = ATTRIBUTE_DEFAULT_FIXED; requireWhitespace(); context = CONTEXT_ATTRIBUTEVALUE; value = readLiteral(LIT_CHAR_REF); context = CONTEXT_DTD; } else if (tryRead("REQUIRED")) { valueType = ATTRIBUTE_DEFAULT_REQUIRED; } else if (tryRead("IMPLIED")) { valueType = ATTRIBUTE_DEFAULT_IMPLIED; } else { error("illegal keyword for attribute default value", null, null); } } else { context = CONTEXT_ATTRIBUTEVALUE; value = readLiteral(LIT_CHAR_REF); context = CONTEXT_DTD; } setAttribute(elementName, name, type, enumeration, value, valueType); } /** * Parse a conditional section. * [63] conditionalSect ::= includeSect || ignoreSect * [64] includeSect ::= '<![' %'INCLUDE' '[' (%markupdecl*)* ']]>' * [65] ignoreSect ::= '<![' %'IGNORE' '[' ignoreSectContents* ']]>' * [66] ignoreSectContents ::= ((SkipLit | Comment | PI) -(Char* ']]>')) * | ('<![' ignoreSectContents* ']]>') * | (Char - (']' | [<'"])) * | ('<!' (Char - ('-' | '['))) * *NOTE: the '<![' has already been read. * *TODO: verify that I am handling ignoreSectContents right. */ void parseConditionalSect () throws java.lang.Exception { skipWhitespace(); if (tryRead("INCLUDE")) { skipWhitespace(); require('['); skipWhitespace(); while (!tryRead("]]>")) { parseMarkupdecl(); skipWhitespace(); } } else if (tryRead("IGNORE")) { skipWhitespace(); require('['); int nesting = 1; char c; for (int nest = 1; nest > 0; ) { c = readCh(); switch (c) { case '<': if (tryRead("![")) { nest++; } case ']': if (tryRead("]>")) { nest--; } } } } else { error("conditional section must begin with INCLUDE or IGNORE", null, null); } } /** * Read a character reference. * [67] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' * *NOTE: the '&#' has already been read. */ void parseCharRef () throws java.lang.Exception { int value = 0; char c; if (tryRead('x')) { loop1: while (true) { c = readCh(); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': value *= 16; value += Integer.parseInt(new Character(c).toString(), 16); break; case ';': break loop1; default: error("illegal character in character reference", c, null); break loop1; } } } else { loop2: while (true) { c = readCh(); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value *= 10; value += Integer.parseInt(new Character(c).toString(), 10); break; case ';': break loop2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -