📄 xmlparser.java

📁 Java的面向对象数据库系统的源代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    //////////////////////////////////////////////////////////////////////    /**     * Parse an XML document.     * <pre>     * [1] document ::= prolog element Misc*     * </pre>     * <p>This is the top-level parsing function for a single XML     * document.  As a minimum, a well-formed document must have     * a document element, and a valid document must have a prolog     * (one with doctype) as well.     */    private void parseDocument ()    throws Exception    {        char c;        try {                                       // added by MHK            parseProlog ();            require ('<');            parseElement ();        } catch (EOFException ee) {                 // added by MHK            error("premature end of file", "[EOF]", null);        }                try {            parseMisc ();   //skip all white, PIs, and comments            c = readCh ();    //if this doesn't throw an exception...            error ("unexpected characters after document end", c, null);        } catch (EOFException e) {            return;        }    }    /**     * Skip a comment.     * <pre>     * [15] Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* "-->"     * </pre>     * <p> (The <code>&lt;!--</code> has already been read.)     */    private void parseComment ()    throws Exception    {    char c;    boolean saved = expandPE;    expandPE = false;    parseUntil ("--");    require ('>');    expandPE = saved;    handler.comment (dataBuffer, 0, dataBufferPos);    dataBufferPos = 0;    }    /**     * Parse a processing instruction and do a call-back.     * <pre>     * [16] PI ::= '&lt;?' PITarget     *      (S (Char* - (Char* '?&gt;' Char*)))?     *      '?&gt;'     * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )     * </pre>     * <p> (The <code>&lt;?</code> has already been read.)     */    private void parsePI ()    throws SAXException, IOException    {    String name;    boolean saved = expandPE;    expandPE = false;    name = readNmtoken (true);    if ("xml".equalsIgnoreCase (name))        error ("Illegal processing instruction target", name, null);    if (!tryRead ("?>")) {        requireWhitespace ();        parseUntil ("?>");    }    expandPE = saved;    handler.processingInstruction (name, dataBufferToString ());    }    /**     * Parse a CDATA section.     * <pre>     * [18] CDSect ::= CDStart CData CDEnd     * [19] CDStart ::= '&lt;![CDATA['     * [20] CData ::= (Char* - (Char* ']]&gt;' Char*))     * [21] CDEnd ::= ']]&gt;'     * </pre>     * <p> (The '&lt;![CDATA[' has already been read.)     */    private void parseCDSect ()    throws Exception    {    parseUntil ("]]>");    dataBufferFlush ();    }    /**     * Parse the prolog of an XML document.     * <pre>     * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?     * </pre>     * <p>There are a couple of tricks here.  First, it is necessary to     * declare the XML default attributes after the DTD (if present)     * has been read. [??]  Second, it is not possible to expand general     * references in attribute value literals until after the entire     * DTD (if present) has been parsed.     * <p>We do not look for the XML declaration here, because it was     * handled by pushURL ().     * @see #pushURL     */    private void parseProlog ()    throws Exception    {    parseMisc ();    if (tryRead ("<!DOCTYPE")) {        parseDoctypedecl ();        parseMisc ();    }    }    /**     * Parse the XML declaration.     * <pre>     * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'     * [24] VersionInfo ::= S 'version' Eq     *      ("'" VersionNum "'" | '"' VersionNum '"' )     * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*     * [32] SDDecl ::= S 'standalone' Eq     *      ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )     * [80] EncodingDecl ::= S 'encoding' Eq     *      ( "'" EncName "'" | "'" EncName "'" )     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*     * </pre>     * <p> (The <code>&lt;?xml</code> and whitespace have already been read.)     * @return the encoding in the declaration, uppercased; or null     * @see #parseTextDecl     * @see #setupDecoding     */    private String parseXMLDecl (boolean ignoreEncoding)    throws SAXException, IOException    {    String  version;    String  encodingName = null;    String  standalone = null;    boolean white;    int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;    // Read the version.    require ("version");    parseEq ();    version = readLiteral (flags);    if (!version.equals ("1.0")) {        error ("unsupported XML version", version, "1.0");    }    // Try reading an encoding declaration.    white = tryWhitespace ();    if (tryRead ("encoding")) {        if (!white)        error ("whitespace required before 'encoding='");        parseEq ();        encodingName = readLiteral (flags);        if (!ignoreEncoding)        setupDecoding (encodingName);    }    // Try reading a standalone declaration    if (encodingName != null)        white = tryWhitespace ();    if (tryRead ("standalone")) {        if (!white)        error ("whitespace required before 'standalone='");        parseEq ();        standalone = readLiteral (flags);        if (! ("yes".equals (standalone) || "no".equals (standalone)))        error ("standalone flag must be 'yes' or 'no'");    }    skipWhitespace ();    require ("?>");    return encodingName;    }    /**     * Parse a text declaration.     * <pre>     * [79] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?&gt;'     * [80] EncodingDecl ::= S 'encoding' Eq     *      ( '"' EncName '"' | "'" EncName "'" )     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*     * </pre>     * <p> (The <code>&lt;?xml</code>' and whitespace have already been read.)     * @return the encoding in the declaration, uppercased; or null     * @see #parseXMLDecl     * @see #setupDecoding     */    private String parseTextDecl (boolean ignoreEncoding)    throws SAXException, IOException    {    String  encodingName = null;    int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;    // Read an optional version.    if (tryRead ("version")) {        String version;        parseEq ();        version = readLiteral (flags);        if (!version.equals ("1.0")) {        error ("unsupported XML version", version, "1.0");        }        requireWhitespace ();    }    // Read the encoding.    require ("encoding");    parseEq ();    encodingName = readLiteral (flags);    if (!ignoreEncoding)        setupDecoding (encodingName);    skipWhitespace ();    require ("?>");    return encodingName;    }    /**     * Sets up internal state so that we can decode an entity using the     * specified encoding.  This is used when we start to read an entity     * and we have been given knowledge of its encoding before we start to     * read any data (e.g. from a SAX input source or from a MIME type).     *     * <p> It is also used after autodetection, at which point only very     * limited adjustments to the encoding may be used (switching between     * related builtin decoders).     *     * @param encodingName The name of the encoding specified by the user.     * @exception IOException if the encoding isn't supported either     *  internally to this parser, or by the hosting JVM.     * @see #parseXMLDecl     * @see #parseTextDecl     */    private void setupDecoding (String encodingName)    throws SAXException, IOException    {    encodingName = encodingName.toUpperCase ();    // ENCODING_EXTERNAL indicates an encoding that wasn't    // autodetected ... we can use builtin decoders, or    // ones from the JVM (InputStreamReader).    // Otherwise we can only tweak what was autodetected, and    // only for single byte (ASCII derived) builtin encodings.    // ASCII-derived encodings    if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) {        if (encodingName.equals ("ISO-8859-1")            || encodingName.equals ("8859_1")            || encodingName.equals ("ISO8859_1")          ) {        encoding = ENCODING_ISO_8859_1;        return;        } else if (encodingName.equals ("US-ASCII")            || encodingName.equals ("ASCII")) {        encoding = ENCODING_ASCII;        return;        } else if (encodingName.equals ("UTF-8")            || encodingName.equals ("UTF8")) {        encoding = ENCODING_UTF_8;        return;        } else if (encoding != ENCODING_EXTERNAL) {        // fatal error        error ("unsupported ASCII-derived encoding",               encodingName,               "UTF-8, US-ASCII, or ISO-8859-1");        }        // else fallthrough ...        // it's ASCII-ish and something other than a builtin    }    // Unicode and such    if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) {        if (!(encodingName.equals ("ISO-10646-UCS-2")            || encodingName.equals ("UTF-16")            || encodingName.equals ("UTF-16BE")            || encodingName.equals ("UTF-16LE")))        error ("unsupported Unicode encoding",               encodingName,               "UTF-16");        return;    }    // four byte encodings    if (encoding == ENCODING_UCS_4_1234        || encoding == ENCODING_UCS_4_4321        || encoding == ENCODING_UCS_4_2143        || encoding == ENCODING_UCS_4_3412) {        if (!encodingName.equals ("ISO-10646-UCS-4"))        error ("unsupported 32-bit encoding",               encodingName,               "ISO-10646-UCS-4");        return;    }    // assert encoding == ENCODING_EXTERNAL    // if (encoding != ENCODING_EXTERNAL)    //     throw new RuntimeException ("encoding = " + encoding);    if (encodingName.equals ("UTF-16BE")) {        encoding = ENCODING_UCS_2_12;        return;    }    if (encodingName.equals ("UTF-16LE")) {        encoding = ENCODING_UCS_2_21;        return;    }    // We couldn't use the builtin decoders at all.  But we can try to    // create a reader, since we haven't messed up buffering.  Tweak    // the encoding name if necessary.    if (encodingName.equals ("UTF-16")        || encodingName.equals ("ISO-10646-UCS-2"))        encodingName = "Unicode";    // Ignoring all the EBCDIC aliases here    reader = new InputStreamReader (is, encodingName);    sourceType = INPUT_READER;    is = null;    }    /**     * Parse miscellaneous markup outside the document element and DOCTYPE     * declaration.     * <pre>     * [27] Misc ::= Comment | PI | S     * </pre>     */    private void parseMisc ()    throws Exception    {    while (true) {        skipWhitespace ();        if (tryRead ("<?")) {        parsePI ();        } else if (tryRead ("<!--")) {        parseComment ();        } else {        return;        }    }    }    /**     * Parse a document type declaration.     * <pre>     * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?     *      ('[' (markupdecl | PEReference | S)* ']' S?)? '&gt;'     * </pre>     * <p> (The <code>&lt;!DOCTYPE</code> has already been read.)     */    private void parseDoctypedecl ()    throws Exception    {    char c;    String doctypeName, ids[];    // Read the document type name.    requireWhitespace ();    doctypeName = readNmtoken (true);    // Read the External subset's IDs    skipWhitespace ();    ids = readExternalIds (false);    // report (a) declaration of name, (b) lexical info (ids)    handler.doctypeDecl (doctypeName, ids [0], ids [1]);    // Internal subset is parsed first, if present    skipWhitespace ();    if (tryRead ('[')) {        // loop until the subset ends        while (true) {        expandPE = true;        skipWhitespace ();        expandPE = false;        if (tryRead (']')) {            break;      // end of subset        } else {            // WFC, PEs in internal subset (only between decls)            peIsError = expandPE = true;            parseMarkupdecl ();            peIsError = expandPE = false;        }        }    }    // Read the external subset, if any    if (ids [1] != null) {        pushURL ("[external subset]", ids [0], ids [1], null, null, null);        // Loop until we end up back at '>'        while (true) {        expandPE = true;        skipWhitespace ();        expandPE = false;        if (tryRead ('>')) {            break;        } else {            expandPE = true;            parseMarkupdecl ();            expandPE = false;        }        }    } else {        // No external subset.        skipWhitespace ();        require ('>');    }    // done dtd    handler.endDoctype ();    expandPE = false;    }    /**     * Parse a markup declaration in the internal or external DTD subset.     * <pre>     * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl     *      | NotationDecl | PI | Comment     * [30] extSubsetDecl ::= (markupdecl | conditionalSect     *      | PEReference | S) *     * </pre>     * <p> Reading toplevel PE references is handled as a lexical issue     * by the caller, as is whitespace.     */    private void parseMarkupdecl ()    throws Exception    {    if (tryRead ("<!ELEMENT")) {        parseElementdecl ();    } else if (tryRead ("<!ATTLIST")) {        parseAttlistDecl ();    } else if (tryRead ("<!ENTITY")) {        parseEntityDecl ();    } else if (tryRead ("<!NOTATION")) {        parseNotationDecl ();    } else if (tryRead ("<?")) {        parsePI ();    } else if (tryRead ("<!--")) {        parseComment ();    } else if (tryRead ("<![")) {        if (inputStack.size () > 0)        parseConditionalSect ();
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -