xmlparser.java
来自「RESIN 3.2 最新源码」· Java 代码 · 共 1,909 行 · 第 1/3 页
JAVA
1,909 行
/* * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * * Free Software Foundation, Inc. * 59 Temple Place, Suite 330 * Boston, MA 02111-1307 USA * * @author Scott Ferguson */package com.caucho.xml2;import com.caucho.util.CharBuffer;import com.caucho.vfs.*;import com.caucho.xml2.readers.MacroReader;import com.caucho.xml2.readers.Utf16Reader;import com.caucho.xml2.readers.Utf8Reader;import com.caucho.xml2.readers.XmlReader;import org.w3c.dom.Document;import org.w3c.dom.Node;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import javax.xml.namespace.QName;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.Arrays;import java.util.logging.Level;/** * A configurable XML parser. Loose versions of XML and HTML are supported * by changing the Policy object. * * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml. */public class XmlParser extends AbstractParser { // Xerces uses the following public static final String XMLNS = "http://www.w3.org/2000/xmlns/"; public static final String XML = "http://www.w3.org/XML/1998/namespace"; static final QName DOC_NAME = new QName("#document"); static final QName TEXT_NAME = new QName("#text"); static final QName WHITESPACE_NAME = new QName("#whitespace"); private static final boolean []XML_NAME_CHAR; QAttributes _attributes; QAttributes _nullAttributes; CharBuffer _text; CharBuffer _eltName; CharBuffer _cb; CharBuffer _buf = new CharBuffer(); String _textFilename; int _textLine; TempCharBuffer _tempInputBuffer; char []_inputBuffer; int _inputOffset; int _inputLength; char []_textBuffer = new char[1024]; int _textLength; int _textCapacity = _textBuffer.length; boolean _isIgnorableWhitespace; char []_valueBuffer = _textBuffer; CharBuffer _name = new CharBuffer(); CharBuffer _nameBuffer = new CharBuffer(); MacroReader _macro = new MacroReader(); int _macroIndex = 0; int _macroLength = 0; char []_macroBuffer; int []_elementLines = new int[64]; int _elementTop; ArrayList<SaxIntern.Entry> _attrNames = new ArrayList<SaxIntern.Entry>(); ArrayList<String> _attrValues = new ArrayList<String>(); ReadStream _is; XmlReader _reader; String _extPublicId; String _extSystemId; NamespaceContextImpl _namespace = new NamespaceContextImpl(); SaxIntern _intern = new SaxIntern(_namespace);; QName _activeNode; QName _topNamespaceNode; boolean _isTagStart; boolean _stopOnIncludeEnd; boolean _hasTopElement; boolean _hasDoctype; Locator _locator = new LocatorImpl(this); public XmlParser() { } /** * Creates a new parser with a given parsing policy and dtd. * * @param policy the parsing policy, handling optional tags. * @param dtd the parser's dtd. */ XmlParser(QDocumentType dtd) { super(dtd); } /** * Initialize the parser. */ void init() { super.init(); _attributes = new QAttributes(); _nullAttributes = new QAttributes(); _eltName = new CharBuffer(); _text = new CharBuffer(); _textLength = 0; _isIgnorableWhitespace = true; _elementTop = 0; _elementLines[0] = 1; _line = 1; _dtd = null; _isTagStart = false; _stopOnIncludeEnd = false; _extPublicId = null; _extSystemId = null; _filename = null; _publicId = null; _systemId = null; _hasTopElement = false; _hasDoctype = false; _macroIndex = 0; _macroLength = 0; _reader = null; // _owner = null; } /** * Parse the document from a read stream. * * @param is read stream to parse from. * * @return The parsed document. */ Document parseInt(ReadStream is) throws IOException, SAXException { _tempInputBuffer = TempCharBuffer.allocate(); _inputBuffer = _tempInputBuffer.getBuffer(); _inputLength = _inputOffset = 0; _is = is; if (_filename == null && _systemId != null) _filename = _systemId; else if (_filename == null) _filename = _is.getUserPath(); if (_systemId == null) { _systemId = _is.getPath().getURL(); if ("null:".equals(_systemId) || "string:".equals(_systemId)) _systemId = "stream"; } if (_filename == null) _filename = _systemId; if (_filename == null) _filename = "stream"; if (_dtd != null) _dtd.setSystemId(_systemId); if (_builder != null) { if (! "string:".equals(_systemId) && ! "stream".equals(_systemId)) _builder.setSystemId(_systemId); _builder.setFilename(_is.getPath().getURL()); } if (_contentHandler == null) _contentHandler = new org.xml.sax.helpers.DefaultHandler(); _contentHandler.setDocumentLocator(_locator); if (_owner == null) _owner = new QDocument(); if (_defaultEncoding != null) _owner.setAttribute("encoding", _defaultEncoding); _owner.addDepend(is.getPath()); _activeNode = DOC_NAME; _contentHandler.startDocument(); parseXMLDeclaration(null); parseNode(); /* if (dbg.canWrite()) { printDebugNode(dbg, doc, 0); dbg.flush(); } */ if (! _hasTopElement) throw error(L.l("XML file has no top-element. All well-formed XML files have a single top-level element.")); _contentHandler.endDocument(); QDocument owner = _owner; _owner = null; return owner; } /** * The main dispatch loop. * * @param node the current node * @param ch the next character */ private void parseNode() throws IOException, SAXException { char []valueBuffer = _valueBuffer; int valueLength = valueBuffer.length; int valueOffset = 0; boolean isWhitespace = true; char []inputBuffer = _inputBuffer; int inputLength = _inputLength; int inputOffset = _inputOffset; loop: while (true) { int ch; if (inputOffset < inputLength) ch = inputBuffer[inputOffset++]; else if (fillBuffer()) { inputBuffer = _inputBuffer; inputOffset = _inputOffset; inputLength = _inputLength; ch = inputBuffer[inputOffset++]; } else { if (valueOffset > 0) addText(valueBuffer, 0, valueOffset, isWhitespace); _inputOffset = inputOffset; _inputLength = inputLength; close(); return; } switch (ch) { case '\n': _line++; valueBuffer[valueOffset++] = (char) ch; break; case ' ': case '\t': case '\r': valueBuffer[valueOffset++] = (char) ch; break; case 0xffff: // marker for end of text for serialization (?) if (valueOffset > 0) addText(valueBuffer, 0, valueOffset, isWhitespace); _inputOffset = inputOffset; _inputLength = inputLength; return; case '&': if (valueOffset > 0) addText(valueBuffer, 0, valueOffset, isWhitespace); _inputOffset = inputOffset; _inputLength = inputLength; parseEntityReference(); inputOffset = _inputOffset; inputLength = _inputOffset; break; case '<': if (valueOffset > 0) addText(valueBuffer, 0, valueOffset, isWhitespace); _inputOffset = inputOffset; _inputLength = inputLength; ch = read(); if (ch == '/') { SaxIntern.Entry entry = parseName(0, false); ch = read(); if (ch != '>') { throw error(L.l("'</{0}>' expected '>' at {1}. Closing tags must close immediately after the tag name.", entry.getName(), badChar(ch))); } _namespace.pop(entry); } // element: <tag attr=value ... attr=value> ... else if (XmlChar.isNameStart(ch)) { parseElement(ch); ch = read(); } // <! ... else if (ch == '!') { // <![CDATA[ ... ]]> if ((ch = read()) == '[') { parseCdata(); ch = read(); } // <!-- ... --> else if (ch == '-') { parseComment(); ch = read(); } else if (XmlChar.isNameStart(ch)) { unread(ch); SaxIntern.Entry entry = parseName(0, false); String declName = entry.getName(); if (declName.equals("DOCTYPE")) { parseDoctype(); if (_contentHandler instanceof DOMBuilder) ((DOMBuilder) _contentHandler).dtd(_dtd); } else throw error(L.l("expected '<!DOCTYPE' declaration at {0}", declName)); } else throw error(L.l("expected '<!DOCTYPE' declaration at {0}", badChar(ch))); } // PI: <?tag attr=value ... attr=value?> else if (ch == '?') { parsePI(); } else { throw error(L.l("expected tag name after '<' at {0}. Open tag names must immediately follow the open brace like '<foo ...>'", badChar(ch))); } inputOffset = _inputOffset; inputLength = _inputLength; break; default: isWhitespace = false; valueBuffer[valueOffset++] = (char) ch; break; } if (valueOffset == valueLength) { addText(valueBuffer, 0, valueOffset, isWhitespace); valueOffset = 0; } } } /** * Parses the <!DOCTYPE> declaration. */ private void parseDoctype() throws IOException, SAXException { if (_activeNode != DOC_NAME) throw error(L.l("<!DOCTYPE immediately follow the <?xml ...?> declaration.")); int ch = skipWhitespace(read()); ch = _reader.parseName(_nameBuffer, ch); String name = _nameBuffer.toString(); ch = skipWhitespace(ch); if (_dtd == null) _dtd = new QDocumentType(name); _dtd.setName(name); if (XmlChar.isNameStart(ch)) { ch = parseExternalID(ch); ch = skipWhitespace(ch); _dtd._publicId = _extPublicId; _dtd._systemId = _extSystemId; } if (_dtd._systemId != null && ! _dtd._systemId.equals("")) { InputStream is = null; unread(ch); XmlReader oldReader = _reader; boolean hasInclude = false; try { pushInclude(_extPublicId, _extSystemId); hasInclude = true; } catch (Exception e) { if (log.isLoggable(Level.FINEST)) log.log(Level.FINER, e.toString(), e); else log.finer(e.toString()); } if (hasInclude) { _stopOnIncludeEnd = true; try { DtdParser dtdParser = new DtdParser(this, _dtd); ch = dtdParser.parseDoctypeDecl(_dtd); } catch (XmlParseException e) { if (_extSystemId != null && _extSystemId.startsWith("http")) { log.log(Level.FINE, e.toString(), e); } else throw e; } _stopOnIncludeEnd = false; while (_reader != null && _reader != oldReader) popInclude(); } if (_reader != null) ch = skipWhitespace(read()); } if (ch == '[') { DtdParser dtdParser = new DtdParser(this, _dtd); ch = dtdParser.parseDoctypeDecl(_dtd); } ch = skipWhitespace(ch); if (ch != '>') throw error(L.l("expected '>' in <!DOCTYPE at {0}", badChar(ch))); } /** * Parses an element. * * @param ch the current character */ private void parseElement(int ch) throws IOException, SAXException { unread(ch); SaxIntern.Entry entry = parseName(0, false); _namespace.push(entry); ch = read(); if (ch != '>' && ch != '/') { ch = parseAttributes(ch, true); } else _attributes.clear(); QName qName = entry.getQName(); if (_isValidating && _dtd != null) { QElementDef elementDef = _dtd.getElement(qName.getLocalPart()); if (elementDef != null) elementDef.fillDefaults(_attributes); } _contentHandler.startElement(entry.getUri(), entry.getLocalName(), entry.getName(), _attributes); _hasTopElement = true; if (ch == '/') { // empty tag: <foo/> if ((ch = read()) == '>') { _contentHandler.endElement(entry.getUri(), entry.getLocalName(), entry.getName()); _namespace.pop(entry); } // short tag: </foo/some text here/> else { throw error(L.l("unexpected character {0} after '/', expected '/>'", badChar(ch), entry.getName())); } } else if (ch != '>') { throw error(L.l("unexpected character {0} while parsing '{1}' attributes. Expected an attribute name or '>' or '/>'. XML element syntax is:\n <name attr-1=\"value-1\" ... attr-n=\"value-n\">", badChar(ch), entry.getName())); } } /** * Parses the attributes in an element. * * @param ch the next character to reader.read. * * @return the next character to read. */ private int parseAttributes(int ch, boolean isElement) throws IOException, SAXException { _attributes.clear(); _attrNames.clear(); _attrValues.clear(); while (ch != -1) { boolean hasWhitespace = false; while (ch <= 0x20 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) { hasWhitespace = true; ch = read(); } if (! XmlChar.isNameStart(ch)) { break; } if (! hasWhitespace) throw error(L.l("attributes must be separated by whitespace")); hasWhitespace = false; unread(ch); SaxIntern.Entry entry = parseName(0, true); ch = read(); while (ch <= 0x20 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) { ch = read(); } String value = null; if (ch != '=') { throw error(L.l("attribute '{0}' expects value at {1}. XML requires attributes to have explicit values.", entry.getName(), badChar(ch))); } ch = read(); while (ch <= 0x20 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) { ch = read(); } value = parseValue(ch); ch = read(); if (entry.isXmlns()) { String prefix; if (entry.getPrefix() != null) prefix = entry.getLocalName(); else prefix = ""; String uri = value; if (_isXmlnsPrefix) { _contentHandler.startPrefixMapping(prefix, uri); } // needed for xml/032e if (isElement && _isXmlnsAttribute) { _attributes.add(entry.getQName(), uri); } } else { _attrNames.add(entry);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?