📄 minml.java
字号:
// Copyright (c) 2000, 2001 The Wilson Partnership.// All Rights Reserved.// @(#)MinML.java, 1.7, 18th November 2001// Author: John Wilson - tug@wilson.co.ukpackage uk.co.wilson.xml;/*Copyright (c) 2000, 2001 John Wilson (tug@wilson.co.uk).All rights reserved.Redistribution and use in source and binary forms,with or without modification, are permitted providedthat the following conditions are met:Redistributions of source code must retain the abovecopyright notice, this list of conditions and thefollowing disclaimer.Redistributions in binary form must reproduce theabove copyright notice, this list of conditions andthe following disclaimer in the documentation and/orother materials provided with the distribution.All advertising materials mentioning features or useof this software must display the following acknowledgement:This product includes software developed by John Wilson.The name of John Wilson may not be used to endorse or promoteproducts derived from this software without specific priorwritten permission.THIS SOFTWARE IS PROVIDED BY JOHN WILSON ``AS IS'' AND ANYEXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR APARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JOHN WILSONBE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITEDTO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED ANDON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICTLIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISINGIN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISEDOF THE POSSIBILITY OF SUCH DAMAGE*/import java.io.IOException;import java.io.InputStreamReader;import java.io.Reader;import java.io.Writer;import java.net.URL;import java.util.EmptyStackException;import java.util.Locale;import java.util.Stack;import java.util.Vector;import org.xml.sax.AttributeList;import org.xml.sax.DTDHandler;import org.xml.sax.EntityResolver;import org.xml.sax.ErrorHandler;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import uk.org.xml.sax.DocumentHandler;import uk.org.xml.sax.Parser;public class MinML implements Parser, Locator, DocumentHandler, ErrorHandler { public static final int endStartName = 0; public static final int emitStartElement = 1; public static final int emitEndElement = 2; public static final int possiblyEmitCharacters = 3; public static final int emitCharacters = 4; public static final int emitCharactersSave = 5; public static final int saveAttributeName = 6; public static final int saveAttributeValue = 7; public static final int startComment = 8; public static final int endComment = 9; public static final int incLevel = 10; public static final int decLevel = 11; public static final int startCDATA = 12; public static final int endCDATA = 13; public static final int processCharRef = 14; public static final int writeCdata = 15; public static final int exitParser = 16; public static final int parseError = 17; public static final int discardAndChange = 18; public static final int discardSaveAndChange = 19; public static final int saveAndChange = 20; public static final int change = 21; public static final int inSkipping = 0; public static final int inSTag = 1; public static final int inPossiblyAttribute = 2; public static final int inNextAttribute = 3; public static final int inAttribute = 4; public static final int inAttribute1 = 5; public static final int inAttributeValue = 6; public static final int inAttributeQuoteValue = 7; public static final int inAttributeQuotesValue = 8; public static final int inETag = 9; public static final int inETag1 = 10; public static final int inMTTag = 11; public static final int inTag = 12; public static final int inTag1 = 13; public static final int inPI = 14; public static final int inPI1 = 15; public static final int inPossiblySkipping = 16; public static final int inCharData = 17; public static final int inCDATA = 18; public static final int inCDATA1 = 19; public static final int inComment =20; public static final int inDTD = 21; public MinML(final int initialBufferSize, final int bufferIncrement) { this.initialBufferSize = initialBufferSize; this.bufferIncrement = bufferIncrement; } public MinML() { this(256, 128); } public void parse(final Reader in) throws SAXException, IOException { final Vector attributeNames = new Vector(); final Vector attributeValues = new Vector(); final AttributeList attrs = new AttributeList() { public int getLength() { return attributeNames.size(); } public String getName(final int i) { return (String)attributeNames.elementAt(i); } public String getType(final int i) { return "CDATA"; } public String getValue(final int i) { return (String)attributeValues.elementAt(i); } public String getType(final String name) { return "CDATA"; } public String getValue(final String name) { final int index = attributeNames.indexOf(name); return (index == -1) ? null : (String)attributeValues.elementAt(index); } }; final MinMLBuffer buffer = new MinMLBuffer(in); int currentChar = 0, charCount = 0; int level = 0; int mixedContentLevel = -1; String elementName = null; String state = operands[inSkipping]; this.lineNumber = 1; this.columnNumber = 0; try { while(true) { charCount++; // // this is to try and make the loop a bit faster // currentChar = buffer.read(); is simpler but is a bit slower. // currentChar = (buffer.nextIn == buffer.lastIn) ? buffer.read() : buffer.chars[buffer.nextIn++]; final int transition; if (currentChar > ']') { transition = state.charAt(14); } else { final int charClass = charClasses[currentChar + 1]; if (charClass == -1) fatalError("Document contains illegal control character with value " + currentChar, this.lineNumber, this.columnNumber); if (charClass == 12) { if (currentChar == '\r') { currentChar = '\n'; charCount = -1; } if (currentChar == '\n') { if (charCount == 0) continue; // preceeded by '\r' so ignore if (charCount != -1) charCount = 0; this.lineNumber++; this.columnNumber = 0; } } transition = state.charAt(charClass); } this.columnNumber++; final String operand = operands[transition >>> 8]; switch (transition & 0XFF) { case endStartName: // end of start element name elementName = buffer.getString(); if (currentChar != '>' && currentChar != '/') break; // change state to operand // drop through to emit start element (we have no attributes) case emitStartElement: // emit start element final Writer newWriter = this.extDocumentHandler.startElement(elementName, attrs, (this.tags.empty()) ? this.extDocumentHandler.startDocument(buffer) : buffer.getWriter()); buffer.pushWriter(newWriter); this.tags.push(elementName); attributeValues.removeAllElements(); attributeNames.removeAllElements(); if (mixedContentLevel != -1) mixedContentLevel++; if (currentChar != '/') break; // change state to operand // <element/> drop through case emitEndElement: // emit end element try { final String begin = (String)this.tags.pop(); buffer.popWriter(); elementName = buffer.getString(); if (currentChar != '/' && !elementName.equals(begin)) { fatalError("end tag </" + elementName + "> does not match begin tag <" + begin + ">", this.lineNumber, this.columnNumber); } else { this.documentHandler.endElement(begin); if (this.tags.empty()) { this.documentHandler.endDocument(); return; } } } catch (final EmptyStackException e) { fatalError("end tag at begining of document", this.lineNumber, this.columnNumber); } if (mixedContentLevel != -1) --mixedContentLevel; break; // change state to operand case emitCharacters: // emit characters buffer.flush(); break; // change state to operand case emitCharactersSave: // emit characters and save current character if (mixedContentLevel == -1) mixedContentLevel = 0; buffer.flush(); buffer.saveChar((char)currentChar); break; // change state to operand case possiblyEmitCharacters: // write any skipped whitespace if in mixed content if (mixedContentLevel != -1) buffer.flush(); break; // change state to operand case saveAttributeName: // save attribute name attributeNames.addElement(buffer.getString()); break; // change state to operand case saveAttributeValue: // save attribute value attributeValues.addElement(buffer.getString()); break; // change state to operand case startComment: // change state if we have found "<!--" if (buffer.read() != '-') continue; // not "<!--" break; // change state to operand case endComment: // change state if we find "-->" if ((currentChar = buffer.read()) == '-') { // deal with the case where we might have "------->" while ((currentChar = buffer.read()) == '-'); if (currentChar == '>') break; // end of comment, change state to operand } continue; // not end of comment, don't change state case incLevel: level++; break; case decLevel: if (level == 0) break; // outer level <> change state level--; continue; // in nested <>, don't change state case startCDATA: // change state if we have found "<![CDATA[" if (buffer.read() != 'C') continue; // don't change state if (buffer.read() != 'D') continue; // don't change state if (buffer.read() != 'A') continue; // don't change state if (buffer.read() != 'T') continue; // don't change state if (buffer.read() != 'A') continue; // don't change state if (buffer.read() != '[') continue; // don't change state break; // change state to operand case endCDATA: // change state if we find "]]>" if ((currentChar = buffer.read()) == ']') { // deal with the case where we might have "]]]]]]]>" while ((currentChar = buffer.read()) == ']') buffer.write(']'); if (currentChar == '>') break; // end of CDATA section, change state to operand buffer.write(']'); } buffer.write(']'); buffer.write(currentChar); continue; // not end of CDATA section, don't change state case processCharRef: // process character entity int crefState = 0; currentChar = buffer.read(); while (true) { if ("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState) == currentChar) { crefState++; if (currentChar == ';') { buffer.write("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState)); break; } else if (currentChar == '#') { final int radix; currentChar = buffer.read(); if (currentChar == 'x') { radix = 16; currentChar = buffer.read(); } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -