xmlparser.java

来自「RESIN 3.2 最新源码」· Java 代码 · 共 2,651 行 · 第 1/5 页

JAVA
2,651
字号
/* * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT.  See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the *   Free SoftwareFoundation, Inc. *   59 Temple Place, Suite 330 *   Boston, MA 02111-1307  USA * * @author Scott Ferguson */package com.caucho.xml;import com.caucho.util.CharBuffer;import com.caucho.vfs.Path;import com.caucho.vfs.ReadStream;import com.caucho.vfs.ReaderWriterStream;import com.caucho.vfs.Vfs;import com.caucho.vfs.WriteStream;import com.caucho.xml.readers.MacroReader;import com.caucho.xml.readers.Utf16Reader;import com.caucho.xml.readers.Utf8Reader;import com.caucho.xml.readers.XmlReader;import org.w3c.dom.Document;import org.w3c.dom.Node;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.Arrays;import java.util.logging.Level;/** * A configurable XML parser.  Loose versions of XML and HTML are supported * by changing the Policy object. * * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml. */public class XmlParser extends AbstractParser {  // Xerces uses the following  public static final String XMLNS = "http://www.w3.org/2000/xmlns/";  public static final String XML = "http://www.w3.org/XML/1998/namespace";  static final QName DOC_NAME = new QName(null, "#document", null);  static final QName TEXT_NAME = new QName(null, "#text", null);  static final QName JSP_NAME = new QName(null, "#jsp", null);  static final QName WHITESPACE_NAME = new QName(null, "#whitespace", null);  static final QName JSP_ATTRIBUTE_NAME = new QName("xtp", "jsp-attribute", null);    QAttributes _attributes;  QAttributes _nullAttributes;  boolean _inDtd;    CharBuffer _text;  CharBuffer _eltName;  CharBuffer _cb;  CharBuffer _buf = new CharBuffer();  String _textFilename;  int _textLine;  char []_textBuffer = new char[1024];  int _textLength;  int _textCapacity = _textBuffer.length;  boolean _isIgnorableWhitespace;  boolean _isJspText;    CharBuffer _name = new CharBuffer();  CharBuffer _nameBuffer = new CharBuffer();    MacroReader _macro = new MacroReader();  int _macroIndex = 0;  int _macroLength = 0;  char []_macroBuffer;  QName []_elementNames = new QName[64];  NamespaceMap []_namespaces = new NamespaceMap[64];  int []_elementLines = new int[64];  int _elementTop;  NamespaceMap _namespaceMap;  ArrayList<String> _attrNames = new ArrayList<String>();  ArrayList<String> _attrValues = new ArrayList<String>();  ReadStream _is;  XmlReader _reader;    String _extPublicId;  String _extSystemId;    QName _activeNode;  QName _topNamespaceNode;  boolean _isTagStart;  boolean _stopOnIncludeEnd;  boolean _hasTopElement;  boolean _hasDoctype;  boolean _isHtml;  Locator _locator = new LocatorImpl(this);  public XmlParser()  {    clear();  }  /**   * Creates a new parser with a given parsing policy and dtd.   *   * @param policy the parsing policy, handling optional tags.   * @param dtd the parser's dtd.   */  XmlParser(Policy policy, QDocumentType dtd)  {    super(policy, dtd);        clear();  }  /**   * Initialize the parser.   */  void init()  {    super.init();        _attributes = new QAttributes();    _nullAttributes = new QAttributes();    _eltName = new CharBuffer();    _text = new CharBuffer();    _isHtml = _policy instanceof HtmlPolicy;    // jsp/193b    // _namespaceMap = null;        _textLength = 0;    _isIgnorableWhitespace = true;    _elementTop = 0;    _elementLines[0] = 1;    _line = 1;    _dtd = null;    _inDtd = false;    _isTagStart = false;    _stopOnIncludeEnd = false;    _extPublicId = null;    _extSystemId = null;    // _filename = null;    _publicId = null;    _systemId = null;        _hasTopElement = false;    _hasDoctype = false;    _macroIndex = 0;    _macroLength = 0;    _reader = null;    // _owner = null;        _policy.init();  }  /**   * Parse the document from a read stream.   *   * @param is read stream to parse from.   *   * @return The parsed document.   */  Document parseInt(ReadStream is)    throws IOException, SAXException  {    _is = is;    if (_filename == null && _systemId != null)      _filename = _systemId;    else if (_filename == null)      _filename = _is.getUserPath();    if (_systemId == null) {      _systemId = _is.getPath().getURL();      if ("null:".equals(_systemId) || "string:".equals(_systemId))	_systemId = "stream";    }    /* xsl/0401    if (_isNamespaceAware)      _namespaceMap = new NamespaceMap(null, "", "");    */    _policy.setNamespaceAware(_isNamespaceAware);        if (_filename == null)      _filename = _systemId;    if (_filename == null)      _filename = "stream";    if (_dtd != null)      _dtd.setSystemId(_systemId);        if (_builder != null) {      if (! "string:".equals(_systemId) && ! "stream".equals(_systemId))	_builder.setSystemId(_systemId);      _builder.setFilename(_is.getPath().getURL());    }    if (_contentHandler == null)      _contentHandler = new org.xml.sax.helpers.DefaultHandler();    _contentHandler.setDocumentLocator(_locator);    if (_owner == null)      _owner = new QDocument();    if (_defaultEncoding != null)      _owner.setAttribute("encoding", _defaultEncoding);    _owner.addDepend(is.getPath());        _activeNode = DOC_NAME;        _policy.setStream(is);    _policy.setNamespace(_namespaceMap);    _contentHandler.startDocument();        int ch = parseXMLDeclaration(null);        ch = skipWhitespace(ch);    parseNode(ch, false);    /*    if (dbg.canWrite()) {      printDebugNode(dbg, doc, 0);      dbg.flush();    }    */    if (_strictXml && ! _hasTopElement)      throw error(L.l("XML file has no top-element.  All well-formed XML files have a single top-level element."));    if (_contentHandler != null)      _contentHandler.endDocument();    QDocument owner = _owner;    _owner = null;          return owner;  }  /**   * The main dispatch loop.   *   * @param node the current node   * @param ch the next character   * @param special true for the short form, &lt;foo/bar/>   */  private void parseNode(int ch, boolean special)    throws IOException, SAXException  {    //boolean isTop = node instanceof QDocument;    _text.clear();  loop:    while (true) {      if (_textLength == 0) {        _textFilename = getFilename();        _textLine = getLine();      }      switch (ch) {      case -1:	if (_textLength != 0)	  appendText();        if (! _stopOnIncludeEnd && _reader.getNext() != null) {          popInclude();          if (_reader != null)            parseNode(_reader.read(), special);          return;        }	closeTag("");	return;      case ' ': case '\t': case '\n': case '\r':	if (! _normalizeWhitespace)	  addText((char) ch);	else if (_textLength == 0) {	  if (! _isTagStart)	    addText(' ');	}	else if (_textBuffer[_textLength - 1] != ' ') {	  addText(' ');	}	ch = _reader.read();	break;      case 0xffff:	// marker for end of text for serialization	return;      default:	addText((char) ch);	ch = _reader.read();	break;      case '/':	if (! special) {	  addText((char) ch);	  ch = _reader.read();	  continue;	}	ch = _reader.read();	if (ch == '>' || ch == -1) {          appendText();          popNode();	  return;	}	addText('/');	break;      case '&':        ch = parseEntityReference();	break;      case '<':	boolean endTag = false;	ch = _reader.read();	if (ch == '/' && ! special) {	  if (_normalizeWhitespace &&	      _textLength > 0 && _textBuffer[_textLength - 1] == ' ') {	    _textLength--;	  }	  appendText();	  ch = _reader.parseName(_name, _reader.read());          if (ch != '>') {            // XXX: Hack for Java PetStore            while (XmlChar.isWhitespace(ch))              ch = _reader.read();            if (ch != '>')              throw error(L.l("`</{0}>' expected `>' at {1}.  Closing tags must close immediately after the tag name.", _name, badChar(ch)));          }	  closeTag(_policy.getName(_name).getName());	  ch = _reader.read();	} 	// element: <tag attr=value ... attr=value> ...	else if (XmlChar.isNameStart(ch)) {	  appendText();	  	  parseElement(ch);	  ch = _reader.read();	}	// <! ...	else if (ch == '!') {	  // <![CDATA[ ... ]]>	  if ((ch = _reader.read()) == '[') {	    parseCdata();	    ch = _reader.read();	  }	  // <!-- ... -->          else if (ch == '-') {	    parseComment();	    ch = _reader.read();	  } 	  else if (XmlChar.isNameStart(ch)) {	    appendText();	    ch = _reader.parseName(_name, ch);	    String declName = _name.toString();	    if (declName.equals("DOCTYPE")) {	      parseDoctype(ch);              if (_contentHandler instanceof DOMBuilder)                ((DOMBuilder) _contentHandler).dtd(_dtd);	      ch = _reader.read();	    } else if (_forgiving && declName.equalsIgnoreCase("doctype")) {	      parseDoctype(ch);              if (_contentHandler instanceof DOMBuilder)                ((DOMBuilder) _contentHandler).dtd(_dtd);              	      ch = _reader.read();	    } else	      throw error(L.l("expected `<!DOCTYPE' declaration at {0}", declName));	  } else if (_forgiving) {	    addText("<!");	  } else	    throw error(L.l("expected `<!DOCTYPE' declaration at {0}", badChar(ch)));	} 	// PI: <?tag attr=value ... attr=value?>	else if (ch == '?') {	  ch = parsePI();	} 	else if (_strictXml) {	  throw error(L.l("expected tag name after `<' at {0}.  Open tag names must immediately follow the open brace like `<foo ...>'", badChar(ch)));        }        // implicit <![CDATA[ for <% ... %>        else if (_isJsp && ch == '%') {          ch = _reader.read();          appendText();          _isJspText = ch != '=';                    addText("<%");          while (ch >= 0) {            if (ch == '%') {              ch = _reader.read();              if (ch == '>') {                addText("%>");                ch = _reader.read();                break;              }              else                addText('%');            }            else {              addText((char) ch);              ch = _reader.read();            }          }          appendText();          _isJspText = false;	}	else {	  addText('<');	}      }    }  }  /**   * Parses the &lt;!DOCTYPE> declaration.   */  private void parseDoctype(int ch)    throws IOException, SAXException  {    if (_activeNode != DOC_NAME)      throw error(L.l("<!DOCTYPE immediately follow the <?xml ...?> declaration."));        _inDtd = true;    ch = skipWhitespace(ch);    ch = _reader.parseName(_nameBuffer, ch);    String name = _nameBuffer.toString();    ch = skipWhitespace(ch);    if (_dtd == null)      _dtd = new QDocumentType(name);    _dtd.setName(name);    if (XmlChar.isNameStart(ch)) {      ch = parseExternalID(ch);      ch = skipWhitespace(ch);      _dtd._publicId = _extPublicId;      _dtd._systemId = _extSystemId;    }    if (_dtd._systemId != null && ! _dtd._systemId.equals("")) {      InputStream is = null;      unread(ch);            XmlReader oldReader = _reader;      boolean hasInclude = false;      try {        pushInclude(_extPublicId, _extSystemId);        hasInclude = true;      } catch (Exception e) {	if (log.isLoggable(Level.FINEST))	  log.log(Level.FINER, e.toString(), e);	else	  log.finer(e.toString());      }      if (hasInclude) {        _stopOnIncludeEnd = true;	try {	  ch = parseDoctypeDecl(_dtd);	} catch (XmlParseException e) {	  if (_extSystemId != null &&	      _extSystemId.startsWith("http")) {	    log.log(Level.FINE, e.toString(), e);	  }	  else	    throw e;	}        _stopOnIncludeEnd = false;        while (_reader != null && _reader != oldReader)          popInclude();      }      if (_reader != null)        ch = skipWhitespace(read());    }    

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?