📄 xmlparser.java
字号:
if (name != currentElement) {
error("mismatched end tag", name, currentElement);
}
skipWhitespace();
require('>');
if (handler != null) {
handler.endElement(name);
}
}
/**
* Parse the content of an element.
* [37] content ::= (element | PCData | Reference | CDSect | PI | Comment)*
* [68] Reference ::= EntityRef | CharRef
*/
void parseContent ()
throws java.lang.Exception
{
String data;
char c;
while (true) {
switch (currentElementContent) {
case CONTENT_ANY:
case CONTENT_MIXED:
parsePCData();
break;
case CONTENT_ELEMENTS:
parseWhitespace();
break;
}
// Handle delimiters
c = readCh();
switch (c) {
case '&': // Found "&"
c = readCh();
if (c == '#') {
parseCharRef();
} else {
unread(c);
parseEntityRef(true);
}
break;
case '<': // Found "<"
c = readCh();
switch (c) {
case '!': // Found "<!"
c = readCh();
switch (c) {
case '-': // Found "<!-"
require('-');
parseComment();
break;
case '[': // Found "<!["
require("CDATA[");
parseCDSect();
break;
default:
error("expected comment or CDATA section", c, null);
break;
}
break;
case '?': // Found "<?"
dataBufferFlush();
parsePI();
break;
case '/': // Found "</"
dataBufferFlush();
parseETag();
return;
default: // Found "<" followed by something else
dataBufferFlush();
unread(c);
parseElement();
break;
}
}
}
}
/**
* Parse an element type declaration.
* [40] elementdecl ::= '<!ELEMENT' S %Name S (%S S)? %contentspec S? '>'
* [VC: Unique Element Declaration]
* *NOTE: the '<!ELEMENT' has already been read.
*/
void parseElementdecl ()
throws java.lang.Exception
{
String name;
requireWhitespace();
// Read the element type name.
name = readNmtoken(true);
requireWhitespace();
// Read the content model.
parseContentspec(name);
skipWhitespace();
require('>');
}
/**
* Content specification.
* [41] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
*/
void parseContentspec (String name)
throws java.lang.Exception
{
if (tryRead("EMPTY")) {
setElement(name, CONTENT_EMPTY, null, null);
return;
} else if (tryRead("ANY")) {
setElement(name, CONTENT_ANY, null, null);
return;
} else {
require('(');
dataBufferAppend('(');
skipWhitespace();
if (tryRead("#PCDATA")) {
dataBufferAppend("#PCDATA");
parseMixed();
setElement(name, CONTENT_MIXED, dataBufferToString(), null);
} else {
parseElements();
setElement(name, CONTENT_ELEMENTS, dataBufferToString(), null);
}
}
}
/**
* Parse an element-content model.
* [42] elements ::= (choice | seq) ('?' | '*' | '+')?
* [44] cps ::= S? %cp S?
* [45] choice ::= '(' S? %ctokplus (S? '|' S? %ctoks)* S? ')'
* [46] ctokplus ::= cps ('|' cps)+
* [47] ctoks ::= cps ('|' cps)*
* [48] seq ::= '(' S? %stoks (S? ',' S? %stoks)* S? ')'
* [49] stoks ::= cps (',' cps)*
* *NOTE: the opening '(' and S have already been read.
* *TODO: go over parameter entity boundaries more carefully.
*/
void parseElements ()
throws java.lang.Exception
{
char c;
char sep;
// Parse the first content particle
skipWhitespace();
parseCp();
// Check for end or for a separator.
skipWhitespace();
c = readCh();
switch (c) {
case ')':
dataBufferAppend(')');
c = readCh();
switch (c) {
case '*':
case '+':
case '?':
dataBufferAppend(c);
break;
default:
unread(c);
}
return;
case ',': // Register the separator.
case '|':
sep = c;
dataBufferAppend(c);
break;
default:
error("bad separator in content model", c, null);
return;
}
// Parse the rest of the content model.
while (true) {
skipWhitespace();
parseCp();
skipWhitespace();
c = readCh();
if (c == ')') {
dataBufferAppend(')');
break;
} else if (c != sep) {
error("bad separator in content model", c, null);
return;
} else {
dataBufferAppend(c);
}
}
// Check for the occurrence indicator.
c = readCh();
switch (c) {
case '?':
case '*':
case '+':
dataBufferAppend(c);
return;
default:
unread(c);
return;
}
}
/**
* Parse a content particle.
* [43] cp ::= (Name | choice | seq) ('?' | '*' | '+')
* *NOTE: I actually use a slightly different production here:
* cp ::= (elements | (Name ('?' | '*' | '+')?))
*/
void parseCp ()
throws java.lang.Exception
{
char c;
if (tryRead('(')) {
dataBufferAppend('(');
parseElements();
} else {
dataBufferAppend(readNmtoken(true));
c = readCh();
switch (c) {
case '?':
case '*':
case '+':
dataBufferAppend(c);
break;
default:
unread(c);
break;
}
}
}
/**
* Parse mixed content.
* [50] Mixed ::= '(' S? %( %'#PCDATA' (S? '|' S? %Mtoks)* ) S? ')*'
* | '(' S? %('#PCDATA') S? ')'
* [51] Mtoks ::= %Name (S? '|' S? %Name)*
* *NOTE: the S and '#PCDATA' have already been read.
*/
void parseMixed ()
throws java.lang.Exception
{
char c;
// Check for PCDATA alone.
skipWhitespace();
if (tryRead(')')) {
dataBufferAppend(")*");
tryRead('*');
return;
}
// Parse mixed content.
skipWhitespace();
while (!tryRead(")*")) {
require('|');
dataBufferAppend('|');
skipWhitespace();
dataBufferAppend(readNmtoken(true));
skipWhitespace();
}
dataBufferAppend(")*");
}
/**
* Parse an attribute list declaration.
* [52] AttlistDecl ::= '<!ATTLIST' S %Name S? %AttDef+ S? '>'
* *NOTE: the '<!ATTLIST' has already been read.
*/
void parseAttlistDecl ()
throws java.lang.Exception
{
String elementName;
requireWhitespace();
elementName = readNmtoken(true);
requireWhitespace();
while (!tryRead('>')) {
parseAttDef(elementName);
skipWhitespace();
}
}
/**
* Parse a single attribute definition.
* [53] AttDef ::= S %Name S %AttType S %Default
*/
void parseAttDef (String elementName)
throws java.lang.Exception
{
String name;
int type;
String enum = null;
// Read the attribute name.
name = readNmtoken(true);
// Read the attribute type.
requireWhitespace();
type = readAttType();
// Get the string of enumerated values
// if necessary.
if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
enum = dataBufferToString();
}
// Read the default value.
requireWhitespace();
parseDefault(elementName, name, type, enum);
}
/**
* Parse the attribute type.
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
* [55] StringType ::= 'CDATA'
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' |
* 'NMTOKEN' | 'NMTOKENS'
* [57] EnumeratedType ::= NotationType | Enumeration
* *TODO: validate the type!!
*/
int readAttType ()
throws java.lang.Exception
{
String typeString;
Integer type;
if (tryRead('(')) {
parseEnumeration();
return ATTRIBUTE_ENUMERATED;
} else {
typeString = readNmtoken(true);
if (typeString.equals("NOTATION")) {
parseNotationType();
}
type = (Integer)attributeTypeHash.get(typeString);
if (type == null) {
error("illegal attribute type", typeString, null);
return ATTRIBUTE_UNDECLARED;
} else {
return type.intValue();
}
}
}
/**
* Parse an enumeration.
* [60] Enumeration ::= '(' S? %Etoks (S? '|' S? %Etoks)* S? ')'
* [61] Etoks ::= %Nmtoken (S? '|' S? %Nmtoken)*
* *NOTE: the '(' has already been read.
*/
void parseEnumeration ()
throws java.lang.Exception
{
char c;
dataBufferAppend('(');
// Read the first token.
skipWhitespace();
dataBufferAppend(readNmtoken(true));
// Read the remaining tokens.
skipWhitespace();
while (!tryRead(')')) {
require('|');
dataBufferAppend('|');
skipWhitespace();
dataBufferAppend(readNmtoken(true));
skipWhitespace();
}
dataBufferAppend(')');
}
/**
* Parse a notation type for an attribute.
* [58] NotationType ::= %'NOTATION' S '(' S? %Ntoks (S? '|' S? %Ntoks)*
* S? ')'
* [59] Ntoks ::= %Name (S? '|' S? %Name)
* *NOTE: the 'NOTATION' has already been read
*/
void parseNotationType ()
throws java.lang.Exception
{
requireWhitespace();
require('(');
parseEnumeration();
}
/**
* Parse the default value for an attribute.
* [62] Default ::= '#REQUIRED' | '#IMPLIED' | ((%'#FIXED' S)? %AttValue
*/
void parseDefault (String elementName, String name, int type, String enum)
throws java.lang.Exception
{
int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
String value = null;
boolean normalizeWSFlag;
if (tryRead('#')) {
if (tryRead("FIXED")) {
valueType = ATTRIBUTE_DEFAULT_FIXED;
requireWhitespace();
context = CONTEXT_ATTRIBUTEVALUE;
value = readLiteral(LIT_CHAR_REF);
context = CONTEXT_DTD;
} else if (tryRead("REQUIRED")) {
valueType = ATTRIBUTE_DEFAULT_REQUIRED;
} else if (tryRead("IMPLIED")) {
valueType = ATTRIBUTE_DEFAULT_IMPLIED;
} else {
error("illegal keyword for attribute default value", null, null);
}
} else {
context = CONTEXT_ATTRIBUTEVALUE;
value = readLiteral(LIT_CHAR_REF);
context = CONTEXT_DTD;
}
setAttribute(elementName, name, type, enum, value, valueType);
}
/**
* Parse a conditional section.
* [63] conditionalSect ::= includeSect || ignoreSect
* [64] includeSect ::= '<![' %'INCLUDE' '[' (%markupdecl*)* ']]>'
* [65] ignoreSect ::= '<![' %'IGNORE' '[' ignoreSectContents* ']]>'
* [66] ignoreSectContents ::= ((SkipLit | Comment | PI) -(Char* ']]>'))
* | ('<![' ignoreSectContents* ']]>')
* | (Char - (']' | [<'"]))
* | ('<!' (Char - ('-' | '[')))
* *NOTE: the '<![' has already been read.
* *TODO: verify that I am handling ignoreSectContents right.
*/
void parseConditionalSect ()
throws java.lang.Exception
{
skipWhitespace();
if (tryRead("INCLUDE")) {
skipWhitespace();
require('[');
skipWhitespace();
while (!tryRead("]]>")) {
parseMarkupdecl();
skipWhitespace();
}
} else if (tryRead("IGNORE")) {
skipWhitespace();
require('[');
int nesting = 1;
char c;
for (int nest = 1; nest > 0; ) {
c = readCh();
switch (c) {
case '<':
if (tryRead("![")) {
nest++;
}
case ']':
if (tryRead("]>")) {
nest--;
}
}
}
} else {
error("conditional section must begin with INCLUDE or IGNORE",
null, null);
}
}
/**
* Read a character reference.
* [67] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
* *NOTE: the '&#' has already been read.
*/
void parseCharRef ()
throws java.lang.Exception
{
int value = 0;
char c;
if (tryRead('x')) {
loop1: while (true) {
c = readCh();
switch (c) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -