xmlparser.java
来自「RESIN 3.2 最新源码」· Java 代码 · 共 2,651 行 · 第 1/5 页
JAVA
2,651 行
/* * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * Free SoftwareFoundation, Inc. * 59 Temple Place, Suite 330 * Boston, MA 02111-1307 USA * * @author Scott Ferguson */package com.caucho.xml;import com.caucho.util.CharBuffer;import com.caucho.vfs.Path;import com.caucho.vfs.ReadStream;import com.caucho.vfs.ReaderWriterStream;import com.caucho.vfs.Vfs;import com.caucho.vfs.WriteStream;import com.caucho.xml.readers.MacroReader;import com.caucho.xml.readers.Utf16Reader;import com.caucho.xml.readers.Utf8Reader;import com.caucho.xml.readers.XmlReader;import org.w3c.dom.Document;import org.w3c.dom.Node;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.Arrays;import java.util.logging.Level;/** * A configurable XML parser. Loose versions of XML and HTML are supported * by changing the Policy object. * * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml. */public class XmlParser extends AbstractParser { // Xerces uses the following public static final String XMLNS = "http://www.w3.org/2000/xmlns/"; public static final String XML = "http://www.w3.org/XML/1998/namespace"; static final QName DOC_NAME = new QName(null, "#document", null); static final QName TEXT_NAME = new QName(null, "#text", null); static final QName JSP_NAME = new QName(null, "#jsp", null); static final QName WHITESPACE_NAME = new QName(null, "#whitespace", null); static final QName JSP_ATTRIBUTE_NAME = new QName("xtp", "jsp-attribute", null); QAttributes _attributes; QAttributes _nullAttributes; boolean _inDtd; CharBuffer _text; CharBuffer _eltName; CharBuffer _cb; CharBuffer _buf = new CharBuffer(); String _textFilename; int _textLine; char []_textBuffer = new char[1024]; int _textLength; int _textCapacity = _textBuffer.length; boolean _isIgnorableWhitespace; boolean _isJspText; CharBuffer _name = new CharBuffer(); CharBuffer _nameBuffer = new CharBuffer(); MacroReader _macro = new MacroReader(); int _macroIndex = 0; int _macroLength = 0; char []_macroBuffer; QName []_elementNames = new QName[64]; NamespaceMap []_namespaces = new NamespaceMap[64]; int []_elementLines = new int[64]; int _elementTop; NamespaceMap _namespaceMap; ArrayList<String> _attrNames = new ArrayList<String>(); ArrayList<String> _attrValues = new ArrayList<String>(); ReadStream _is; XmlReader _reader; String _extPublicId; String _extSystemId; QName _activeNode; QName _topNamespaceNode; boolean _isTagStart; boolean _stopOnIncludeEnd; boolean _hasTopElement; boolean _hasDoctype; boolean _isHtml; Locator _locator = new LocatorImpl(this); public XmlParser() { clear(); } /** * Creates a new parser with a given parsing policy and dtd. * * @param policy the parsing policy, handling optional tags. * @param dtd the parser's dtd. */ XmlParser(Policy policy, QDocumentType dtd) { super(policy, dtd); clear(); } /** * Initialize the parser. */ void init() { super.init(); _attributes = new QAttributes(); _nullAttributes = new QAttributes(); _eltName = new CharBuffer(); _text = new CharBuffer(); _isHtml = _policy instanceof HtmlPolicy; // jsp/193b // _namespaceMap = null; _textLength = 0; _isIgnorableWhitespace = true; _elementTop = 0; _elementLines[0] = 1; _line = 1; _dtd = null; _inDtd = false; _isTagStart = false; _stopOnIncludeEnd = false; _extPublicId = null; _extSystemId = null; // _filename = null; _publicId = null; _systemId = null; _hasTopElement = false; _hasDoctype = false; _macroIndex = 0; _macroLength = 0; _reader = null; // _owner = null; _policy.init(); } /** * Parse the document from a read stream. * * @param is read stream to parse from. * * @return The parsed document. */ Document parseInt(ReadStream is) throws IOException, SAXException { _is = is; if (_filename == null && _systemId != null) _filename = _systemId; else if (_filename == null) _filename = _is.getUserPath(); if (_systemId == null) { _systemId = _is.getPath().getURL(); if ("null:".equals(_systemId) || "string:".equals(_systemId)) _systemId = "stream"; } /* xsl/0401 if (_isNamespaceAware) _namespaceMap = new NamespaceMap(null, "", ""); */ _policy.setNamespaceAware(_isNamespaceAware); if (_filename == null) _filename = _systemId; if (_filename == null) _filename = "stream"; if (_dtd != null) _dtd.setSystemId(_systemId); if (_builder != null) { if (! "string:".equals(_systemId) && ! "stream".equals(_systemId)) _builder.setSystemId(_systemId); _builder.setFilename(_is.getPath().getURL()); } if (_contentHandler == null) _contentHandler = new org.xml.sax.helpers.DefaultHandler(); _contentHandler.setDocumentLocator(_locator); if (_owner == null) _owner = new QDocument(); if (_defaultEncoding != null) _owner.setAttribute("encoding", _defaultEncoding); _owner.addDepend(is.getPath()); _activeNode = DOC_NAME; _policy.setStream(is); _policy.setNamespace(_namespaceMap); _contentHandler.startDocument(); int ch = parseXMLDeclaration(null); ch = skipWhitespace(ch); parseNode(ch, false); /* if (dbg.canWrite()) { printDebugNode(dbg, doc, 0); dbg.flush(); } */ if (_strictXml && ! _hasTopElement) throw error(L.l("XML file has no top-element. All well-formed XML files have a single top-level element.")); if (_contentHandler != null) _contentHandler.endDocument(); QDocument owner = _owner; _owner = null; return owner; } /** * The main dispatch loop. * * @param node the current node * @param ch the next character * @param special true for the short form, <foo/bar/> */ private void parseNode(int ch, boolean special) throws IOException, SAXException { //boolean isTop = node instanceof QDocument; _text.clear(); loop: while (true) { if (_textLength == 0) { _textFilename = getFilename(); _textLine = getLine(); } switch (ch) { case -1: if (_textLength != 0) appendText(); if (! _stopOnIncludeEnd && _reader.getNext() != null) { popInclude(); if (_reader != null) parseNode(_reader.read(), special); return; } closeTag(""); return; case ' ': case '\t': case '\n': case '\r': if (! _normalizeWhitespace) addText((char) ch); else if (_textLength == 0) { if (! _isTagStart) addText(' '); } else if (_textBuffer[_textLength - 1] != ' ') { addText(' '); } ch = _reader.read(); break; case 0xffff: // marker for end of text for serialization return; default: addText((char) ch); ch = _reader.read(); break; case '/': if (! special) { addText((char) ch); ch = _reader.read(); continue; } ch = _reader.read(); if (ch == '>' || ch == -1) { appendText(); popNode(); return; } addText('/'); break; case '&': ch = parseEntityReference(); break; case '<': boolean endTag = false; ch = _reader.read(); if (ch == '/' && ! special) { if (_normalizeWhitespace && _textLength > 0 && _textBuffer[_textLength - 1] == ' ') { _textLength--; } appendText(); ch = _reader.parseName(_name, _reader.read()); if (ch != '>') { // XXX: Hack for Java PetStore while (XmlChar.isWhitespace(ch)) ch = _reader.read(); if (ch != '>') throw error(L.l("`</{0}>' expected `>' at {1}. Closing tags must close immediately after the tag name.", _name, badChar(ch))); } closeTag(_policy.getName(_name).getName()); ch = _reader.read(); } // element: <tag attr=value ... attr=value> ... else if (XmlChar.isNameStart(ch)) { appendText(); parseElement(ch); ch = _reader.read(); } // <! ... else if (ch == '!') { // <![CDATA[ ... ]]> if ((ch = _reader.read()) == '[') { parseCdata(); ch = _reader.read(); } // <!-- ... --> else if (ch == '-') { parseComment(); ch = _reader.read(); } else if (XmlChar.isNameStart(ch)) { appendText(); ch = _reader.parseName(_name, ch); String declName = _name.toString(); if (declName.equals("DOCTYPE")) { parseDoctype(ch); if (_contentHandler instanceof DOMBuilder) ((DOMBuilder) _contentHandler).dtd(_dtd); ch = _reader.read(); } else if (_forgiving && declName.equalsIgnoreCase("doctype")) { parseDoctype(ch); if (_contentHandler instanceof DOMBuilder) ((DOMBuilder) _contentHandler).dtd(_dtd); ch = _reader.read(); } else throw error(L.l("expected `<!DOCTYPE' declaration at {0}", declName)); } else if (_forgiving) { addText("<!"); } else throw error(L.l("expected `<!DOCTYPE' declaration at {0}", badChar(ch))); } // PI: <?tag attr=value ... attr=value?> else if (ch == '?') { ch = parsePI(); } else if (_strictXml) { throw error(L.l("expected tag name after `<' at {0}. Open tag names must immediately follow the open brace like `<foo ...>'", badChar(ch))); } // implicit <![CDATA[ for <% ... %> else if (_isJsp && ch == '%') { ch = _reader.read(); appendText(); _isJspText = ch != '='; addText("<%"); while (ch >= 0) { if (ch == '%') { ch = _reader.read(); if (ch == '>') { addText("%>"); ch = _reader.read(); break; } else addText('%'); } else { addText((char) ch); ch = _reader.read(); } } appendText(); _isJspText = false; } else { addText('<'); } } } } /** * Parses the <!DOCTYPE> declaration. */ private void parseDoctype(int ch) throws IOException, SAXException { if (_activeNode != DOC_NAME) throw error(L.l("<!DOCTYPE immediately follow the <?xml ...?> declaration.")); _inDtd = true; ch = skipWhitespace(ch); ch = _reader.parseName(_nameBuffer, ch); String name = _nameBuffer.toString(); ch = skipWhitespace(ch); if (_dtd == null) _dtd = new QDocumentType(name); _dtd.setName(name); if (XmlChar.isNameStart(ch)) { ch = parseExternalID(ch); ch = skipWhitespace(ch); _dtd._publicId = _extPublicId; _dtd._systemId = _extSystemId; } if (_dtd._systemId != null && ! _dtd._systemId.equals("")) { InputStream is = null; unread(ch); XmlReader oldReader = _reader; boolean hasInclude = false; try { pushInclude(_extPublicId, _extSystemId); hasInclude = true; } catch (Exception e) { if (log.isLoggable(Level.FINEST)) log.log(Level.FINER, e.toString(), e); else log.finer(e.toString()); } if (hasInclude) { _stopOnIncludeEnd = true; try { ch = parseDoctypeDecl(_dtd); } catch (XmlParseException e) { if (_extSystemId != null && _extSystemId.startsWith("http")) { log.log(Level.FINE, e.toString(), e); } else throw e; } _stopOnIncludeEnd = false; while (_reader != null && _reader != oldReader) popInclude(); } if (_reader != null) ch = skipWhitespace(read()); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?