📄 parser.java
字号:
/* * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 only, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is * included at /legal/license.txt). * * You should have received a copy of the GNU General Public License * version 2 along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa * Clara, CA 95054 or visit www.sun.com if you need additional * information or have any questions. *//* * @(#)Parser.java 1.14 05/10/20 */package com.sun.ukit.jaxp;import java.util.Hashtable;import java.io.InputStream;import java.io.Reader;import java.io.InputStreamReader;import java.io.IOException;import java.io.UnsupportedEncodingException;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.Locator;import org.xml.sax.InputSource;import org.xml.sax.Attributes;import org.xml.sax.SAXParseException;import org.xml.sax.SAXException;import javax.xml.parsers.SAXParser;/** * XML non-validating parser. * * This non-validating parser conforms to <a href="http://www.w3.org/TR/REC-xml" * >Extensible Markup Language (XML) 1.0</a> and <a href="http://www.w3.org/TR/REC-xml-names" * >"Namespaces in XML"</a> specifications. * The API used by the parser is <a href="http://www.jcp.org/en/jsr/detail?id=172" * >JSR-172</a> subset of <a href="http://java.sun.com/xml/jaxp/index.html">JAXP</a> * and <a href="http://www.saxproject.org/">SAX2</a>. * * @see org.xml.sax.helpers.DefaultHandler */public final class Parser extends SAXParser implements Locator{ public final static String FAULT = ""; private final static short BUFFSIZE_READER = 512; private final static short BUFFSIZE_PARSER = 128; private final static short BUFFSIZE_ENTITY = 32; /** The end of stream character. */ public final static char EOS = 0xffff; private Pair mNoNS; // there is no namespace private Pair mXml; // the xml namespace private DefaultHandler mHand; // a document handler private Hashtable mEnt; // the entities look up table private Hashtable mPEnt; // the parmeter entities look up table private boolean mIsNSAware; // if true - to report QName private short mSt; // global state of the parser // mSt values: // - 0 : the begining of the document // - 1 : misc before DTD // - 2 : DTD // - 3 : misc after DTD // - 4 : document's element // - 5 : misc after document's element private char mESt; // built-in entity recognizer state // mESt values: // 0x100 : the initial state // > 0x100 : unrecognized name // < 0x100 : replacement character private char[] mBuff; // parser buffer private short mBuffIdx; // index of the last char private Pair mPref; // stack of prefixes private Pair mElm; // stack of elements private Pair mAttL; // list of defined attributes by element name private Input mInp; // stack of entities private Input mDoc; // document entity private char[] mChars; // reading buffer private char mChLen; // current capacity private char mChIdx; // index to the next char private Attrs mAttrs; // attributes of the curr. element private String[] mItems; // attributes array of the curr. element private char mAttrIdx; // attributes counter/index private Pair mDltd; // deleted objects for reuse /** * Default prefixes */ private final static char NONS[]; private final static char XML[]; private final static char XMLNS[]; static { NONS = new char[1]; NONS[0] = (char)0; XML = new char[4]; XML[0] = (char)4; XML[1] = 'x'; XML[2] = 'm'; XML[3] = 'l'; XMLNS = new char[6]; XMLNS[0] = (char)6; XMLNS[1] = 'x'; XMLNS[2] = 'm'; XMLNS[3] = 'l'; XMLNS[4] = 'n'; XMLNS[5] = 's'; } /** * ASCII character type array. * * This array maps an ASCII (7 bit) character to the character type.<br /> * Possible character type values are:<br /> * - ' ' for any kind of white space character;<br /> * - 'a' for any lower case alphabetical character value;<br /> * - 'A' for any upper case alphabetical character value;<br /> * - 'd' for any decimal digit character value;<br /> * - 'z' for any character less then ' ' except '\t', '\n', '\r';<br /> * An ASCII (7 bit) character which does not fall in any category listed * above is mapped to it self. */ private static final byte asctyp[]; /** * Static constructor. * * Sets up the ASCII character type array which is used by * {@link #asctyp asctyp} method. */ static { asctyp = new byte[0x80]; short i = 0; while (i <= 0x1f) asctyp[i++] = (byte)'z'; asctyp['\t'] = (byte)' '; asctyp['\r'] = (byte)' '; asctyp['\n'] = (byte)' '; for (;i <= 0x2f; i++) asctyp[i] = (byte)i; while (i <= 0x39) asctyp[i++] = (byte)'d'; for (;i <= 0x40; i++) asctyp[i] = (byte)i; while (i <= 0x5a) asctyp[i++] = (byte)'A'; for (;i <= 0x60; i++) asctyp[i] = (byte)i; while (i <= 0x7a) asctyp[i++] = (byte)'a'; for (;i <= 0x7f; i++) asctyp[i] = (byte)i; } /** * Constructor. */ public Parser(boolean nsaware) { super(); mIsNSAware = nsaware; // Initialize the parser mBuff = new char[BUFFSIZE_PARSER]; mAttrs = new Attrs(); // Default namespace mPref = pair(mPref); mPref.name = ""; mPref.value = ""; mPref.chars = NONS; mNoNS = mPref; // no namespace // XML namespace mPref = pair(mPref); mPref.name = "xml"; mPref.value = "http://www.w3.org/XML/1998/namespace"; mPref.chars = XML; mXml = mPref; // XML namespace } /** * Return the public identifier for the current document event. * * <p>The return value is the public identifier of the document * entity or of the external parsed entity in which the markup * triggering the event appears.</p> * * @return A string containing the public identifier, or * null if none is available. * * @see #getSystemId */ public String getPublicId() { return (mInp != null)? mInp.pubid: null; } /** * Return the system identifier for the current document event. * * <p>The return value is the system identifier of the document * entity or of the external parsed entity in which the markup * triggering the event appears.</p> * * <p>If the system identifier is a URL, the parser must resolve it * fully before passing it to the application.</p> * * @return A string containing the system identifier, or null * if none is available. * * @see #getPublicId */ public String getSystemId() { return (mInp != null)? mInp.sysid: null; } /** * Return the line number where the current document event ends. * * @return Always returns -1 indicating the line number is not * available. * * @see #getColumnNumber */ public int getLineNumber() { return -1; } /** * Return the column number where the current document event ends. * * @return Always returns -1 indicating the column number is not * available. * * @see #getLineNumber */ public int getColumnNumber() { return -1; } /** * Indicates whether or not this parser is configured to * understand namespaces. * * @return true if this parser is configured to * understand namespaces; false otherwise. */ public boolean isNamespaceAware() { return mIsNSAware; } /** * Indicates whether or not this parser is configured to validate * XML documents. * * @return true if this parser is configured to validate XML * documents; false otherwise. */ public boolean isValidating() { return false; } /** * Parse the content of the given {@link java.io.InputStream} * instance as XML using the specified * {@link org.xml.sax.helpers.DefaultHandler}. * * @param src InputStream containing the content to be parsed. * @param handler The SAX DefaultHandler to use. * @exception IOException If any IO errors occur. * @exception IllegalArgumentException If the given InputStream or handler is null. * @exception SAXException If the underlying parser throws a * SAXException while parsing. * @see org.xml.sax.helpers.DefaultHandler */ public void parse(InputStream src, DefaultHandler handler) throws SAXException, IOException { if ((src == null) || (handler == null)) throw new IllegalArgumentException(""); parse(new InputSource(src), handler); } /** * Parse the content given {@link org.xml.sax.InputSource} * as XML using the specified * {@link org.xml.sax.helpers.DefaultHandler}. * * @param is The InputSource containing the content to be parsed. * @param handler The SAX DefaultHandler to use. * @exception IOException If any IO errors occur. * @exception IllegalArgumentException If the InputSource or handler is null. * @exception SAXException If the underlying parser throws a * SAXException while parsing. * @see org.xml.sax.helpers.DefaultHandler */ public void parse(InputSource is, DefaultHandler handler) throws SAXException, IOException { if ((is == null) || (handler == null)) throw new IllegalArgumentException(""); // Set up the handler mHand = handler; // Set up the document mInp = new Input(BUFFSIZE_READER); setinp(is); parse(handler); } /** * Parse the XML document content using the specified * {@link org.xml.sax.helpers.DefaultHandler}. * * @param handler The SAX DefaultHandler to use. * @exception IOException If any IO errors occur. * @exception SAXException If the underlying parser throws a * SAXException while parsing. * @see org.xml.sax.helpers.DefaultHandler */ private void parse(DefaultHandler handler) throws SAXException, IOException { try { // Initialize the parser mPEnt = new Hashtable(); mEnt = new Hashtable(); mDoc = mInp; // current input is document entity mChars = mInp.chars; // use document entity buffer // Parse an xml document char ch; mHand.setDocumentLocator(this); mHand.startDocument(); mSt = 1; while ((ch = next()) != EOS) { switch (chtyp(ch)) { case '<': ch = next(); switch (ch) { case '?': pi(); break; case '!': ch = next(); back(); if (ch == '-') comm(); else dtd(); break; default: // must be the first char of an xml name if (mSt == 5) // misc after document's element panic(FAULT); // Document's element. back(); mSt = 4; // document's element elm(); mSt = 5; // misc after document's element break; } break; case ' ': // Skip white spaces break; default: panic(FAULT); } } if (mSt != 5) // misc after document's element panic(FAULT); } finally { mHand.endDocument(); while (mAttL != null) { while (mAttL.list != null) { if (mAttL.list.list != null) del(mAttL.list.list); mAttL.list = del(mAttL.list); } mAttL = del(mAttL); } while (mElm != null) mElm = del(mElm); while (mPref != mXml) mPref = del(mPref); while (mInp != null) pop(); if ((mDoc != null) && (mDoc.src != null)) { try { mDoc.src.close(); } catch (IOException ioe) {} } mPEnt = null; mEnt = null; mDoc = null; mHand = null; } } /** * Parses the document type declaration. * * @exception SAXException * @exception IOException */ private void dtd() throws SAXException, IOException { char ch; String str = null; String name = null; Pair psid = null; // read 'DOCTYPE' if ("DOCTYPE".equals(name(false)) != true) panic(FAULT); mSt = 2; // DTD for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // read the document type name if (chtyp(ch) != ' ') { back(); name = name(mIsNSAware); wsskip(); st = 1; // read 'PUPLIC' or 'SYSTEM' } break; case 1: // read 'PUPLIC' or 'SYSTEM' switch (chtyp(ch)) { case 'A': back(); psid = pubsys(' '); st = 2; // skip spaces before internal subset break; case '[': back(); st = 2; // skip spaces before internal subset break; case '>': back(); st = 3; // skip spaces after internal subset break; default: panic(FAULT); } if (psid != null) { // DTD external subset // The parser reports DTD external subset by resolveEntity, but // does not expect to get back an InputSource object. There are // two reasons: // 1. the parser does not support validation thus it may ignore // the DTD external subset. // 2. the parser does not support schemas if (mHand.resolveEntity(psid.name, psid.value) != null) panic(FAULT); del(psid); // Unresolved DTD external subset mHand.skippedEntity("[dtd]"); } break; case 2: // skip spaces before internal subset switch (chtyp(ch)) { case '[': // Process internal subset dtdint(); st = 3; // skip spaces after internal subset break; case '>': back(); st = 3; // skip spaces after internal subset break; case ' ': // skip white spaces break; default: panic(FAULT); } break; case 3: // skip spaces after internal subset switch (chtyp(ch)) { case '>': st = -1; // end of DTD break; case ' ': // skip white spaces break; default: panic(FAULT); } break; default: panic(FAULT); } } mSt = 3; // misc after DTD } /** * Parses the document type declaration internal subset. * * @exception SAXException * @exception IOException */ private void dtdint() throws SAXException, IOException { char ch; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // skip white spaces before a declaration switch (chtyp(ch)) { case '<': ch = next(); switch (ch) { case '?': pi(); break; case '!': ch = next(); back(); if (ch == '-') { comm(); break; } // markup or entity declaration bntok(); switch (bkeyword()) { case 'n': dtdent(); break; case 'a': dtdattl(); // parse attributes declaration break; case 'e': dtdelm(); // parse element declaration
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -