📄 inputstreamsource.java
字号:
// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML// http://sourceforge.org/projects/htmlparser// Copyright (C) 2004 Derrick Oswald//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v $// $Author: derrickoswald $// $Date: 2005/10/25 01:26:09 $// $Revision: 1.9 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.lexer;import java.io.ByteArrayInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.ObjectInputStream;import java.io.ObjectOutputStream;import java.io.UnsupportedEncodingException;import org.htmlparser.util.EncodingChangeException;import org.htmlparser.util.ParserException;/** * A source of characters based on an InputStream such as from a URLConnection. */public class InputStreamSource extends Source{ /** * An initial buffer size. * Has a default value of {16384}. */ public static int BUFFER_SIZE = 16384; /** * The stream of bytes. * Set to <code>null</code> when the source is closed. */ protected transient InputStream mStream; /** * The character set in use. */ protected String mEncoding; /** * The converter from bytes to characters. */ protected transient InputStreamReader mReader; /** * The characters read so far. */ protected char[] mBuffer; /** * The number of valid bytes in the buffer. */ protected int mLevel; /** * The offset of the next byte returned by read(). */ protected int mOffset; /** * The bookmark. */ protected int mMark; /** * Create a source of characters using the default character set. * @param stream The stream of bytes to use. * @exception UnsupportedEncodingException If the default character set * is unsupported. */ public InputStreamSource (InputStream stream) throws UnsupportedEncodingException { this (stream, null, BUFFER_SIZE); } /** * Create a source of characters. * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. * @exception UnsupportedEncodingException If the character set * is unsupported. */ public InputStreamSource (InputStream stream, String charset) throws UnsupportedEncodingException { this (stream, charset, BUFFER_SIZE); } /** * Create a source of characters. * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. * @param size The initial character buffer size. * @exception UnsupportedEncodingException If the character set * is unsupported. */ public InputStreamSource (InputStream stream, String charset, int size) throws UnsupportedEncodingException { if (null == stream) stream = new Stream (null); else // bug #1044707 mark()/reset() issues if (!stream.markSupported ()) // wrap the stream so we can reset stream = new Stream (stream); // else // just because mark is supported doesn't guarantee // proper reset operation; there is no call to mark // in this code, so if reset misbehaves there is an // appropriate message in setEncoding() to suggest // wraping it in a Stream. // This was deemed better than an attempt to call // reset at this point just to check if we would // succeed later, or to call mark with an arbitrary // lookahead size mStream = stream; if (null == charset) { mReader = new InputStreamReader (stream); mEncoding = mReader.getEncoding (); } else { mEncoding = charset; mReader = new InputStreamReader (stream, charset); } mBuffer = new char[size]; mLevel = 0; mOffset = 0; mMark = -1; } // // Serialization support // /** * Serialization support. * @param out Where to write this object. * @exception IOException If serialization has a problem. */ private void writeObject (ObjectOutputStream out) throws IOException { int offset; char[] buffer; if (null != mStream) { // remember the offset, drain the input stream, restore the offset offset = mOffset; buffer = new char[4096]; while (EOF != read (buffer)) ; mOffset = offset; } out.defaultWriteObject (); } /** * Deserialization support. * @param in Where to read this object from. * @exception IOException If deserialization has a problem. */ private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject (); if (null != mBuffer) // buffer is null when destroy's been called // pretend we're open, mStream goes null when exhausted mStream = new ByteArrayInputStream (new byte[0]); } /** * Get the input stream being used. * @return The current input stream. */ public InputStream getStream () { return (mStream); } /** * Get the encoding being used to convert characters. * @return The current encoding. */ public String getEncoding () { return (mEncoding); } /** * Begins reading from the source with the given character set. * If the current encoding is the same as the requested encoding, * this method is a no-op. Otherwise any subsequent characters read from * this page will have been decoded using the given character set.<p> * Some magic happens here to obtain this result if characters have already * been consumed from this source. * Since a Reader cannot be dynamically altered to use a different character * set, the underlying stream is reset, a new Source is constructed * and a comparison made of the characters read so far with the newly * read characters up to the current position. * If a difference is encountered, or some other problem occurs, * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. * @exception ParserException If a character mismatch occurs between * characters already provided and those that would have been returned * had the new character set been in effect from the beginning. An * exception is also thrown if the underlying stream won't put up with * these shenanigans. */ public void setEncoding (String character_set) throws ParserException { String encoding; InputStream stream; char[] buffer; int offset; char[] new_chars; encoding = getEncoding (); if (!encoding.equalsIgnoreCase (character_set)) { stream = getStream (); try { buffer = mBuffer; offset = mOffset; stream.reset (); try { mEncoding = character_set; mReader = new InputStreamReader (stream, character_set); mBuffer = new char[mBuffer.length]; mLevel = 0; mOffset = 0; mMark = -1; if (0 != offset) { new_chars = new char[offset]; if (offset != read (new_chars)) throw new ParserException ("reset stream failed"); for (int i = 0; i < offset; i++) if (new_chars[i] != buffer[i]) throw new EncodingChangeException ("character mismatch (new: " + new_chars[i] + " [0x" + Integer.toString (new_chars[i], 16) + "] != old: " + " [0x" + Integer.toString (buffer[i], 16) + buffer[i] + "]) for encoding change from " + encoding + " to " + character_set + " at character offset " + i); } } catch (IOException ioe) { throw new ParserException (ioe.getMessage (), ioe); } } catch (IOException ioe) { // bug #1044707 mark()/reset() issues throw new ParserException ("Stream reset failed (" + ioe.getMessage () + "), try wrapping it with a org.htmlparser.lexer.Stream", ioe); } } } /** * Fetch more characters from the underlying reader. * Has no effect if the underlying reader has been drained. * @param min The minimum to read. * @exception IOException If the underlying reader read() throws one. */ protected void fill (int min) throws IOException { char[] buffer; int size; int read; if (null != mReader) // mReader goes null when it's been sucked dry { size = mBuffer.length - mLevel; // available space if (size < min) // oops, better get some buffer space { // unknown length... keep doubling size = mBuffer.length * 2; read = mLevel + min; if (size < read) // or satisfy min, whichever is greater size = read; else min = size - mLevel; // read the max buffer = new char[size]; } else { buffer = mBuffer; min = size; } // read into the end of the 'new' buffer read = mReader.read (buffer, mLevel, min); if (EOF == read) { mReader.close (); mReader = null;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -