📄 xmlparser.java
字号:
skipUntil("-->");
}
/**
* Parse a processing instruction and do a call-back.
* <pre>
* [19] PI ::= '<?' Name (S (Char* - (Char* '?>' Char*)))? '?>'
* </pre>
* <p>(The <code><?</code> has already been read.)
* <p>An XML processing instruction <em>must</em> begin with
* a Name, which is the instruction's target.
*/
void parsePI ()
throws java.lang.Exception
{
String name;
name = readNmtoken(true);
if (!tryRead("?>")) {
requireWhitespace();
parseUntil("?>");
}
if (handler != null) {
handler.processingInstruction(name, dataBufferToString());
}
}
/**
* Parse a CDATA marked section.
* <pre>
* [20] CDSect ::= CDStart CData CDEnd
* [21] CDStart ::= '<![CDATA['
* [22] CData ::= (Char* - (Char* ']]>' Char*))
* [23] CDEnd ::= ']]>'
* </pre>
* <p>(The '<![CDATA[' has already been read.)
* <p>Note that this just appends characters to the dataBuffer,
* without actually generating an event.
*/
void parseCDSect ()
throws java.lang.Exception
{
parseUntil("]]>");
}
/**
* Parse the prolog of an XML document.
* <pre>
* [24] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
* </pre>
* <p>There are a couple of tricks here. First, it is necessary to
* declare the XML default attributes after the DTD (if present)
* has been read. Second, it is not possible to expand general
* references in attribute value literals until after the entire
* DTD (if present) has been parsed.
* <p>We do not look for the XML declaration here, because it is
* handled by pushURL().
* @see pushURL
*/
void parseProlog ()
throws java.lang.Exception
{
parseMisc();
if (tryRead("<!DOCTYPE")) {
parseDoctypedecl();
parseMisc();
}
}
/**
* Parse the XML declaration.
* <pre>
* [25] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
* [26] VersionInfo ::= S 'version' Eq ('"1.0"' | "'1.0'")
* [33] SDDecl ::= S 'standalone' Eq "'" ('yes' | 'no') "'"
* | S 'standalone' Eq '"' ("yes" | "no") '"'
* [78] EncodingDecl ::= S 'encoding' Eq QEncoding
* </pre>
* <p>([80] to [82] are also significant.)
* <p>(The <code><?xml</code> and whitespace have already been read.)
* <p>TODO: validate value of standalone.
* @see #parseTextDecl
* @see #checkEncoding
*/
void parseXMLDecl (boolean ignoreEncoding)
throws java.lang.Exception
{
String version;
String encodingName = null;
String standalone = null;
// Read the version.
require("version");
parseEq();
version = readLiteral(0);
if (!version.equals("1.0")) {
error("unsupported XML version", version, "1.0");
}
// Try reading an encoding declaration.
skipWhitespace();
if (tryRead("encoding")) {
parseEq();
encodingName = readLiteral(0);
checkEncoding(encodingName, ignoreEncoding);
}
// Try reading a standalone declaration
skipWhitespace();
if (tryRead("standalone")) {
parseEq();
standalone = readLiteral(0);
}
skipWhitespace();
require("?>");
}
/**
* Parse the Encoding PI.
* <pre>
* [78] EncodingDecl ::= S 'encoding' Eq QEncoding
* [79] EncodingPI ::= '<?xml' S 'encoding' Eq QEncoding S? '?>'
* [80] QEncoding ::= '"' Encoding '"' | "'" Encoding "'"
* [81] Encoding ::= LatinName
* [82] LatinName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
* </pre>
* <p>(The <code><?xml</code>' and whitespace have already been read.)
* @see #parseXMLDecl
* @see #checkEncoding
*/
void parseTextDecl (boolean ignoreEncoding)
throws java.lang.Exception
{
String encodingName = null;
// Read an optional version.
if (tryRead("version")) {
String version;
parseEq();
version = readLiteral(0);
if (!version.equals("1.0")) {
error("unsupported XML version", version, "1.0");
}
requireWhitespace();
}
// Read the encoding.
require("encoding");
parseEq();
encodingName = readLiteral(0);
checkEncoding(encodingName, ignoreEncoding);
skipWhitespace();
require("?>");
}
/**
* Check that the encoding specified makes sense.
* <p>Compare what the author has specified in the XML declaration
* or encoding PI with what we have detected.
* <p>This is also important for distinguishing among the various
* 7- and 8-bit encodings, such as ISO-LATIN-1 (I cannot autodetect
* those).
* @param encodingName The name of the encoding specified by the user.
* @see #parseXMLDecl
* @see #parseTextDecl
*/
void checkEncoding (String encodingName, boolean ignoreEncoding)
throws java.lang.Exception
{
encodingName = encodingName.toUpperCase();
if (ignoreEncoding) {
return;
}
switch (encoding) {
// 8-bit encodings
case ENCODING_UTF_8:
if (encodingName.equals("ISO-8859-1")) {
encoding = ENCODING_ISO_8859_1;
} else if (!encodingName.equals("UTF-8")) {
error("unsupported 8-bit encoding",
encodingName,
"UTF-8 or ISO-8859-1");
}
break;
// 16-bit encodings
case ENCODING_UCS_2_12:
case ENCODING_UCS_2_21:
if (!encodingName.equals("ISO-10646-UCS-2") &&
!encodingName.equals("UTF-16")) {
error("unsupported 16-bit encoding",
encodingName,
"ISO-10646-UCS-2");
}
break;
// 32-bit encodings
case ENCODING_UCS_4_1234:
case ENCODING_UCS_4_4321:
case ENCODING_UCS_4_2143:
case ENCODING_UCS_4_3412:
if (!encodingName.equals("ISO-10646-UCS-4")) {
error("unsupported 32-bit encoding",
encodingName,
"ISO-10646-UCS-4");
}
}
}
/**
* Parse miscellaneous markup outside the document element and DOCTYPE
* declaration.
* <pre>
* [27] Misc ::= Comment | PI | S
* </pre>
*/
void parseMisc ()
throws java.lang.Exception
{
while (true)
{
skipWhitespace();
if (tryRead("<?"))
{parsePI();}
else if (tryRead("<!--"))
{parseComment();}
else
{return;}
}
}
/**
* Parse a document type declaration.
* <pre>
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
* ('[' %markupdecl* ']' S?)? '>'
* </pre>
* <p>(The <code><!DOCTYPE</code> has already been read.)
*/
void parseDoctypedecl ()
throws java.lang.Exception
{
char c;
String doctypeName, ids[];
// Read the document type name.
requireWhitespace();
doctypeName = readNmtoken(true);
// Read the ExternalIDs.
skipWhitespace();
ids = readExternalIds(false);
// Look for a declaration subset.
skipWhitespace();
if (tryRead('[')) {
// loop until the subset ends
while (true) {
context = CONTEXT_DTD;
skipWhitespace();
context = CONTEXT_NONE;
if (tryRead(']')) {
break; // end of subset
} else {
context = CONTEXT_DTD;
parseMarkupdecl();
context = CONTEXT_NONE;
}
}
}
// Read the external subset, if any
if (ids[1] != null) {
pushURL("[external subset]", ids[0], ids[1], null, null, null);
// Loop until we end up back at '>'
while (true) {
context = CONTEXT_DTD;
skipWhitespace();
context = CONTEXT_NONE;
if (tryRead('>')) {
break;
} else {
context = CONTEXT_DTD;
parseMarkupdecl();
context = CONTEXT_NONE;
}
}
} else {
// No external subset.
skipWhitespace();
require('>');
}
if (handler != null) {
handler.doctypeDecl(doctypeName, ids[0], ids[1]);
}
// Expand general entities in
// default values of attributes.
// (Do this after the doctypeDecl
// event!).
// expandAttributeDefaultValues();
}
/**
* Parse a markup declaration in the internal or external DTD subset.
* <pre>
* [29] markupdecl ::= ( %elementdecl | %AttlistDecl | %EntityDecl |
* %NotationDecl | %PI | %S | %Comment |
* InternalPERef )
* [30] InternalPERef ::= PEReference
* [31] extSubset ::= (%markupdecl | %conditionalSect)*
* </pre>
*/
void parseMarkupdecl ()
throws java.lang.Exception
{
if (tryRead("<!ELEMENT")) {
parseElementdecl();
} else if (tryRead("<!ATTLIST")) {
parseAttlistDecl();
} else if (tryRead("<!ENTITY")) {
parseEntityDecl();
} else if (tryRead("<!NOTATION")) {
parseNotationDecl();
} else if (tryRead("<?")) {
parsePI();
} else if (tryRead("<!--")) {
parseComment();
} else if (tryRead("<![")) {
parseConditionalSect();
} else {
error("expected markup declaration", null, null);
}
}
/**
* Parse an element, with its tags.
* <pre>
* [33] STag ::= '<' Name (S Attribute)* S? '>' [WFC: unique Att spec]
* [38] element ::= EmptyElement | STag content ETag
* [39] EmptyElement ::= '<' Name (S Attribute)* S? '/>'
* [WFC: unique Att spec]
* </pre>
* <p>(The '<' has already been read.)
* <p>NOTE: this method actually chains onto parseContent(), if necessary,
* and parseContent() will take care of calling parseETag().
*/
void parseElement ()
throws java.lang.Exception
{
String gi;
char c;
int oldElementContent = currentElementContent;
String oldElement = currentElement;
// This is the (global) counter for the
// array of specified attributes.
tagAttributePos = 0;
// Read the element type name.
gi = readNmtoken(true);
// Determine the current content type.
currentElement = gi;
currentElementContent = getElementContentType(gi);
if (currentElementContent == CONTENT_UNDECLARED) {
currentElementContent = CONTENT_ANY;
}
// Read the attributes, if any.
// After this loop, we should be just
// in front of the closing delimiter.
skipWhitespace();
c = readCh();
while (c != '/' && c != '>') {
unread(c);
parseAttribute(gi);
skipWhitespace();
c = readCh();
}
unread(c);
// Supply any defaulted attributes.
Enumeration atts = declaredAttributes(gi);
if (atts != null) {
String aname;
loop: while (atts.hasMoreElements()) {
aname = (String)atts.nextElement();
// See if it was specified.
for (int i = 0; i < tagAttributePos; i++) {
if (tagAttributes[i] == aname) {
continue loop;
}
}
// I guess not...
if (handler != null) {
handler.attribute(aname,
getAttributeExpandedValue(gi, aname),
false);
}
}
}
// Figure out if this is a start tag
// or an empty element, and dispatch an
// event accordingly.
c = readCh();
switch (c) {
case '>':
if (handler != null) {
handler.startElement(gi);
}
parseContent();
break;
case '/':
require('>');
if (handler != null) {
handler.startElement(gi);
handler.endElement(gi);
}
break;
}
// Restore the previous state.
currentElement = oldElement;
currentElementContent = oldElementContent;
}
/**
* Parse an attribute assignment.
* <pre>
* [34] Attribute ::= Name Eq AttValue
* </pre>
* @param name The name of the attribute's element.
* @see XmlHandler#attribute
*/
void parseAttribute (String name)
throws java.lang.Exception
{
String aname;
int type;
String value;
// Read the attribute name.
aname = readNmtoken(true).intern();
type = getAttributeDefaultValueType(name, aname);
// Parse '='
parseEq();
// Read the value, normalizing whitespace
// if it is not CDATA.
if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) {
value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF);
} else {
value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF | LIT_NORMALIZE);
}
// Inform the handler about the
// attribute.
if (handler != null) {
handler.attribute(aname, value, true);
}
dataBufferPos = 0;
// Note that the attribute has been
// specified.
if (tagAttributePos == tagAttributes.length) {
String newAttrib[] = new String[tagAttributes.length * 2];
System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);
tagAttributes = newAttrib;
}
tagAttributes[tagAttributePos++] = aname;
}
/**
* Parse an equals sign surrounded by optional whitespace.
* [35] Eq ::= S? '=' S?
*/
void parseEq ()
throws java.lang.Exception
{
skipWhitespace();
require('=');
skipWhitespace();
}
/**
* Parse an end tag.
* [36] ETag ::= '</' Name S? '>'
* *NOTE: parseContent() chains to here.
*/
void parseETag ()
throws java.lang.Exception
{
String name;
name = readNmtoken(true);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -