xmlparser.java

来自「RESIN 3.2 最新源码」· Java 代码 · 共 1,909 行 · 第 1/3 页

JAVA
1,909
字号
/* * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT.  See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * *   Free Software Foundation, Inc. *   59 Temple Place, Suite 330 *   Boston, MA 02111-1307  USA * * @author Scott Ferguson */package com.caucho.xml2;import com.caucho.util.CharBuffer;import com.caucho.vfs.*;import com.caucho.xml2.readers.MacroReader;import com.caucho.xml2.readers.Utf16Reader;import com.caucho.xml2.readers.Utf8Reader;import com.caucho.xml2.readers.XmlReader;import org.w3c.dom.Document;import org.w3c.dom.Node;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import javax.xml.namespace.QName;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.Arrays;import java.util.logging.Level;/** * A configurable XML parser.  Loose versions of XML and HTML are supported * by changing the Policy object. * * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml. */public class XmlParser extends AbstractParser {  // Xerces uses the following  public static final String XMLNS = "http://www.w3.org/2000/xmlns/";  public static final String XML = "http://www.w3.org/XML/1998/namespace";  static final QName DOC_NAME = new QName("#document");  static final QName TEXT_NAME = new QName("#text");  static final QName WHITESPACE_NAME = new QName("#whitespace");    private static final boolean []XML_NAME_CHAR;  QAttributes _attributes;  QAttributes _nullAttributes;  CharBuffer _text;  CharBuffer _eltName;  CharBuffer _cb;  CharBuffer _buf = new CharBuffer();  String _textFilename;  int _textLine;  TempCharBuffer _tempInputBuffer;  char []_inputBuffer;  int _inputOffset;  int _inputLength;  char []_textBuffer = new char[1024];  int _textLength;  int _textCapacity = _textBuffer.length;  boolean _isIgnorableWhitespace;    char []_valueBuffer = _textBuffer;    CharBuffer _name = new CharBuffer();  CharBuffer _nameBuffer = new CharBuffer();    MacroReader _macro = new MacroReader();  int _macroIndex = 0;  int _macroLength = 0;  char []_macroBuffer;  int []_elementLines = new int[64];  int _elementTop;  ArrayList<SaxIntern.Entry> _attrNames = new ArrayList<SaxIntern.Entry>();  ArrayList<String> _attrValues = new ArrayList<String>();  ReadStream _is;  XmlReader _reader;    String _extPublicId;  String _extSystemId;  NamespaceContextImpl _namespace = new NamespaceContextImpl();  SaxIntern _intern = new SaxIntern(_namespace);;    QName _activeNode;  QName _topNamespaceNode;  boolean _isTagStart;  boolean _stopOnIncludeEnd;  boolean _hasTopElement;  boolean _hasDoctype;  Locator _locator = new LocatorImpl(this);  public XmlParser()  {  }  /**   * Creates a new parser with a given parsing policy and dtd.   *   * @param policy the parsing policy, handling optional tags.   * @param dtd the parser's dtd.   */  XmlParser(QDocumentType dtd)  {    super(dtd);  }  /**   * Initialize the parser.   */  void init()  {    super.init();        _attributes = new QAttributes();    _nullAttributes = new QAttributes();    _eltName = new CharBuffer();    _text = new CharBuffer();    _textLength = 0;    _isIgnorableWhitespace = true;    _elementTop = 0;    _elementLines[0] = 1;    _line = 1;    _dtd = null;    _isTagStart = false;    _stopOnIncludeEnd = false;    _extPublicId = null;    _extSystemId = null;    _filename = null;    _publicId = null;    _systemId = null;        _hasTopElement = false;    _hasDoctype = false;    _macroIndex = 0;    _macroLength = 0;    _reader = null;    // _owner = null;  }  /**   * Parse the document from a read stream.   *   * @param is read stream to parse from.   *   * @return The parsed document.   */  Document parseInt(ReadStream is)    throws IOException, SAXException  {    _tempInputBuffer = TempCharBuffer.allocate();    _inputBuffer = _tempInputBuffer.getBuffer();    _inputLength = _inputOffset = 0;        _is = is;    if (_filename == null && _systemId != null)      _filename = _systemId;    else if (_filename == null)      _filename = _is.getUserPath();    if (_systemId == null) {      _systemId = _is.getPath().getURL();      if ("null:".equals(_systemId) || "string:".equals(_systemId))	_systemId = "stream";    }    if (_filename == null)      _filename = _systemId;    if (_filename == null)      _filename = "stream";    if (_dtd != null)      _dtd.setSystemId(_systemId);        if (_builder != null) {      if (! "string:".equals(_systemId) && ! "stream".equals(_systemId))	_builder.setSystemId(_systemId);      _builder.setFilename(_is.getPath().getURL());    }    if (_contentHandler == null)      _contentHandler = new org.xml.sax.helpers.DefaultHandler();    _contentHandler.setDocumentLocator(_locator);    if (_owner == null)      _owner = new QDocument();    if (_defaultEncoding != null)      _owner.setAttribute("encoding", _defaultEncoding);    _owner.addDepend(is.getPath());        _activeNode = DOC_NAME;        _contentHandler.startDocument();        parseXMLDeclaration(null);        parseNode();    /*    if (dbg.canWrite()) {      printDebugNode(dbg, doc, 0);      dbg.flush();    }    */    if (! _hasTopElement)      throw error(L.l("XML file has no top-element.  All well-formed XML files have a single top-level element."));    _contentHandler.endDocument();    QDocument owner = _owner;    _owner = null;          return owner;  }  /**   * The main dispatch loop.   *   * @param node the current node   * @param ch the next character   */  private void parseNode()    throws IOException, SAXException  {    char []valueBuffer = _valueBuffer;    int valueLength = valueBuffer.length;    int valueOffset = 0;    boolean isWhitespace = true;        char []inputBuffer = _inputBuffer;    int inputLength = _inputLength;    int inputOffset = _inputOffset;  loop:    while (true) {      int ch;      if (inputOffset < inputLength)	ch = inputBuffer[inputOffset++];      else if (fillBuffer()) {	inputBuffer = _inputBuffer;	inputOffset = _inputOffset;	inputLength = _inputLength;		ch = inputBuffer[inputOffset++];      }      else {	if (valueOffset > 0)	  addText(valueBuffer, 0, valueOffset, isWhitespace);		_inputOffset = inputOffset;	_inputLength = inputLength;	close();	return;      }      switch (ch) {      case '\n':	_line++;	valueBuffer[valueOffset++] = (char) ch;	break;	      case ' ': case '\t': case '\r':	valueBuffer[valueOffset++] = (char) ch;	break;      case 0xffff:	// marker for end of text for serialization (?)	if (valueOffset > 0)	  addText(valueBuffer, 0, valueOffset, isWhitespace);		_inputOffset = inputOffset;	_inputLength = inputLength;	return;      case '&':	if (valueOffset > 0)	  addText(valueBuffer, 0, valueOffset, isWhitespace);		_inputOffset = inputOffset;	_inputLength = inputLength;	        parseEntityReference();		inputOffset = _inputOffset;	inputLength = _inputOffset;	break;      case '<':	if (valueOffset > 0)	  addText(valueBuffer, 0, valueOffset, isWhitespace);		_inputOffset = inputOffset;	_inputLength = inputLength;	ch = read();	if (ch == '/') {	  SaxIntern.Entry entry = parseName(0, false);	  ch = read();          if (ch != '>') {	    throw error(L.l("'</{0}>' expected '>' at {1}.  Closing tags must close immediately after the tag name.",			    entry.getName(), badChar(ch)));          }	  _namespace.pop(entry);	} 	// element: <tag attr=value ... attr=value> ...	else if (XmlChar.isNameStart(ch)) {	  parseElement(ch);	  ch = read();	}	// <! ...	else if (ch == '!') {	  // <![CDATA[ ... ]]>	  if ((ch = read()) == '[') {	    parseCdata();	    ch = read();	  }	  // <!-- ... -->          else if (ch == '-') {	    parseComment();	    ch = read();	  } 	  else if (XmlChar.isNameStart(ch)) {	    unread(ch);	    	    SaxIntern.Entry entry = parseName(0, false);	    String declName = entry.getName();	    if (declName.equals("DOCTYPE")) {	      parseDoctype();              if (_contentHandler instanceof DOMBuilder)                ((DOMBuilder) _contentHandler).dtd(_dtd);	    }	    else	      throw error(L.l("expected '<!DOCTYPE' declaration at {0}", declName));	  }	  else	    throw error(L.l("expected '<!DOCTYPE' declaration at {0}", badChar(ch)));	} 	// PI: <?tag attr=value ... attr=value?>	else if (ch == '?') {	  parsePI();	} 	else {	  throw error(L.l("expected tag name after '<' at {0}.  Open tag names must immediately follow the open brace like '<foo ...>'", badChar(ch)));        }		inputOffset = _inputOffset;	inputLength = _inputLength;	break;	      default:	isWhitespace = false;	valueBuffer[valueOffset++] = (char) ch;	break;      }      if (valueOffset == valueLength) {	addText(valueBuffer, 0, valueOffset, isWhitespace);	valueOffset = 0;      }    }  }  /**   * Parses the &lt;!DOCTYPE> declaration.   */  private void parseDoctype()    throws IOException, SAXException  {    if (_activeNode != DOC_NAME)      throw error(L.l("<!DOCTYPE immediately follow the <?xml ...?> declaration."));        int ch = skipWhitespace(read());    ch = _reader.parseName(_nameBuffer, ch);    String name = _nameBuffer.toString();    ch = skipWhitespace(ch);    if (_dtd == null)      _dtd = new QDocumentType(name);    _dtd.setName(name);    if (XmlChar.isNameStart(ch)) {      ch = parseExternalID(ch);      ch = skipWhitespace(ch);      _dtd._publicId = _extPublicId;      _dtd._systemId = _extSystemId;    }    if (_dtd._systemId != null && ! _dtd._systemId.equals("")) {      InputStream is = null;      unread(ch);            XmlReader oldReader = _reader;      boolean hasInclude = false;      try {        pushInclude(_extPublicId, _extSystemId);        hasInclude = true;      } catch (Exception e) {	if (log.isLoggable(Level.FINEST))	  log.log(Level.FINER, e.toString(), e);	else	  log.finer(e.toString());      }      if (hasInclude) {        _stopOnIncludeEnd = true;	try {	  DtdParser dtdParser = new DtdParser(this, _dtd);	  ch = dtdParser.parseDoctypeDecl(_dtd);	} catch (XmlParseException e) {	  if (_extSystemId != null &&	      _extSystemId.startsWith("http")) {	    log.log(Level.FINE, e.toString(), e);	  }	  else	    throw e;	}        _stopOnIncludeEnd = false;        while (_reader != null && _reader != oldReader)          popInclude();      }      if (_reader != null)        ch = skipWhitespace(read());    }        if (ch == '[') {      DtdParser dtdParser = new DtdParser(this, _dtd);      ch = dtdParser.parseDoctypeDecl(_dtd);    }    ch = skipWhitespace(ch);    if (ch != '>')      throw error(L.l("expected '>' in <!DOCTYPE at {0}",                      badChar(ch)));  }  /**   * Parses an element.   *   * @param ch the current character   */  private void parseElement(int ch)    throws IOException, SAXException  {    unread(ch);    SaxIntern.Entry entry = parseName(0, false);        _namespace.push(entry);    ch = read();        if (ch != '>' && ch != '/') {      ch = parseAttributes(ch, true);    }    else      _attributes.clear();    QName qName = entry.getQName();    if (_isValidating && _dtd != null) {      QElementDef elementDef = _dtd.getElement(qName.getLocalPart());            if (elementDef != null)        elementDef.fillDefaults(_attributes);    }        _contentHandler.startElement(entry.getUri(),				 entry.getLocalName(),				 entry.getName(),				 _attributes);    _hasTopElement = true;    if (ch == '/') {      // empty tag: <foo/>      if ((ch = read()) == '>') {	_contentHandler.endElement(entry.getUri(),				   entry.getLocalName(),				   entry.getName());	_namespace.pop(entry);      }      // short tag: </foo/some text here/>      else {	throw error(L.l("unexpected character {0} after '/', expected '/>'",                      badChar(ch), entry.getName()));      }    }    else if (ch != '>') {      throw error(L.l("unexpected character {0} while parsing '{1}' attributes.  Expected an attribute name or '>' or '/>'.  XML element syntax is:\n  <name attr-1=\"value-1\" ... attr-n=\"value-n\">",                      badChar(ch), entry.getName()));    }  }  /**   * Parses the attributes in an element.   *   * @param ch the next character to reader.read.   *   * @return the next character to read.   */  private int parseAttributes(int ch, boolean isElement)    throws IOException, SAXException  {    _attributes.clear();    _attrNames.clear();    _attrValues.clear();    while (ch != -1) {      boolean hasWhitespace = false;	      while (ch <= 0x20	     && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {	hasWhitespace = true;	ch = read();      }	      if (! XmlChar.isNameStart(ch)) {	break;      }      if (! hasWhitespace)	throw error(L.l("attributes must be separated by whitespace"));      hasWhitespace = false;      unread(ch);      SaxIntern.Entry entry = parseName(0, true);      ch = read();      while (ch <= 0x20	     && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {	ch = read();      }      String value = null;      if (ch != '=') {	throw error(L.l("attribute '{0}' expects value at {1}.  XML requires attributes to have explicit values.",                        entry.getName(), badChar(ch)));      }      ch = read();            while (ch <= 0x20	     && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {	ch = read();      }      value = parseValue(ch);      ch = read();      if (entry.isXmlns()) {	String prefix;		if (entry.getPrefix() != null)	  prefix = entry.getLocalName();	else	  prefix = "";	  	String uri = value;	        if (_isXmlnsPrefix) {          _contentHandler.startPrefixMapping(prefix, uri);	}	// needed for xml/032e	if (isElement && _isXmlnsAttribute) {          _attributes.add(entry.getQName(), uri);	}      }      else {	_attrNames.add(entry);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?