inputstreamsource.java

来自「html 解析处理代码」· Java 代码 · 共 700 行 · 第 1/2 页
JAVA
700 行
// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML// http://sourceforge.org/projects/htmlparser// Copyright (C) 2004 Derrick Oswald//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v $// $Author: derrickoswald $// $Date: 2005/10/25 01:26:09 $// $Revision: 1.9 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.lexer;import java.io.ByteArrayInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.ObjectInputStream;import java.io.ObjectOutputStream;import java.io.UnsupportedEncodingException;import org.htmlparser.util.EncodingChangeException;import org.htmlparser.util.ParserException;/** * A source of characters based on an InputStream such as from a URLConnection. */public class InputStreamSource    extends        Source{    /**     * An initial buffer size.     * Has a default value of {16384}.     */    public static int BUFFER_SIZE = 16384;    /**     * The stream of bytes.     * Set to <code>null</code> when the source is closed.     */    protected transient InputStream mStream;    /**     * The character set in use.     */    protected String mEncoding;    /**     * The converter from bytes to characters.     */    protected transient InputStreamReader mReader;    /**     * The characters read so far.     */    protected char[] mBuffer;    /**     * The number of valid bytes in the buffer.     */    protected int mLevel;    /**     * The offset of the next byte returned by read().     */    protected int mOffset;    /**     * The bookmark.     */    protected int mMark;    /**     * Create a source of characters using the default character set.     * @param stream The stream of bytes to use.     * @exception UnsupportedEncodingException If the default character set     * is unsupported.     */    public InputStreamSource (InputStream stream)        throws            UnsupportedEncodingException    {        this (stream, null, BUFFER_SIZE);    }    /**     * Create a source of characters.     * @param stream The stream of bytes to use.     * @param charset The character set used in encoding the stream.     * @exception UnsupportedEncodingException If the character set     * is unsupported.     */    public InputStreamSource (InputStream stream, String charset)        throws            UnsupportedEncodingException    {        this (stream, charset, BUFFER_SIZE);    }    /**     * Create a source of characters.     * @param stream The stream of bytes to use.     * @param charset The character set used in encoding the stream.     * @param size The initial character buffer size.     * @exception UnsupportedEncodingException If the character set     * is unsupported.     */    public InputStreamSource (InputStream stream, String charset, int size)        throws            UnsupportedEncodingException    {        if (null == stream)            stream = new Stream (null);        else            // bug #1044707 mark()/reset() issues            if (!stream.markSupported ())                // wrap the stream so we can reset                stream = new Stream (stream);            // else                // just because mark is supported doesn't guarantee                // proper reset operation; there is no call to mark                // in this code, so if reset misbehaves there is an                // appropriate message in setEncoding() to suggest                // wraping it in a Stream.                // This was deemed better than an attempt to call                // reset at this point just to check if we would                // succeed later, or to call mark with an arbitrary                // lookahead size        mStream = stream;        if (null == charset)        {            mReader = new InputStreamReader (stream);            mEncoding = mReader.getEncoding ();        }        else        {            mEncoding = charset;            mReader = new InputStreamReader (stream, charset);        }        mBuffer = new char[size];        mLevel = 0;        mOffset = 0;        mMark = -1;    }    //    // Serialization support    //    /**     * Serialization support.     * @param out Where to write this object.     * @exception IOException If serialization has a problem.     */    private void writeObject (ObjectOutputStream out)        throws            IOException    {        int offset;        char[] buffer;        if (null != mStream)        {            // remember the offset, drain the input stream, restore the offset            offset = mOffset;            buffer = new char[4096];            while (EOF != read (buffer))                ;            mOffset = offset;        }        out.defaultWriteObject ();    }    /**     * Deserialization support.     * @param in Where to read this object from.     * @exception IOException If deserialization has a problem.     */    private void readObject (ObjectInputStream in)        throws            IOException,            ClassNotFoundException    {        in.defaultReadObject ();        if (null != mBuffer) // buffer is null when destroy's been called            // pretend we're open, mStream goes null when exhausted            mStream = new ByteArrayInputStream (new byte[0]);    }    /**     * Get the input stream being used.     * @return The current input stream.     */    public InputStream getStream ()    {        return (mStream);    }    /**     * Get the encoding being used to convert characters.     * @return The current encoding.     */    public String getEncoding ()    {        return (mEncoding);    }    /**     * Begins reading from the source with the given character set.     * If the current encoding is the same as the requested encoding,     * this method is a no-op. Otherwise any subsequent characters read from     * this page will have been decoded using the given character set.<p>     * Some magic happens here to obtain this result if characters have already     * been consumed from this source.     * Since a Reader cannot be dynamically altered to use a different character     * set, the underlying stream is reset, a new Source is constructed     * and a comparison made of the characters read so far with the newly     * read characters up to the current position.     * If a difference is encountered, or some other problem occurs,     * an exception is thrown.     * @param character_set The character set to use to convert bytes into     * characters.     * @exception ParserException If a character mismatch occurs between     * characters already provided and those that would have been returned     * had the new character set been in effect from the beginning. An     * exception is also thrown if the underlying stream won't put up with     * these shenanigans.     */    public void setEncoding (String character_set)        throws            ParserException    {        String encoding;        InputStream stream;        char[] buffer;        int offset;        char[] new_chars;        encoding = getEncoding ();        if (!encoding.equalsIgnoreCase (character_set))        {            stream = getStream ();            try            {                buffer = mBuffer;                offset = mOffset;                stream.reset ();                try                {                    mEncoding = character_set;                    mReader = new InputStreamReader (stream, character_set);                    mBuffer = new char[mBuffer.length];                    mLevel = 0;                    mOffset = 0;                    mMark = -1;                    if (0 != offset)                    {                        new_chars = new char[offset];                        if (offset != read (new_chars))                            throw new ParserException ("reset stream failed");                        for (int i = 0; i < offset; i++)                            if (new_chars[i] != buffer[i])                                throw new EncodingChangeException ("character mismatch (new: "                                + new_chars[i]                                + " [0x"                                + Integer.toString (new_chars[i], 16)                                + "] != old: "                                + " [0x"                                + Integer.toString (buffer[i], 16)                                + buffer[i]                                + "]) for encoding change from "                                + encoding                                + " to "                                + character_set                                + " at character offset "                                + i);                    }                }                catch (IOException ioe)                {                    throw new ParserException (ioe.getMessage (), ioe);                }            }            catch (IOException ioe)            {   // bug #1044707 mark()/reset() issues                throw new ParserException ("Stream reset failed ("                    + ioe.getMessage ()                    + "), try wrapping it with a org.htmlparser.lexer.Stream",                    ioe);            }        }    }    /**     * Fetch more characters from the underlying reader.     * Has no effect if the underlying reader has been drained.     * @param min The minimum to read.     * @exception IOException If the underlying reader read() throws one.     */    protected void fill (int min)        throws            IOException    {        char[] buffer;        int size;        int read;        if (null != mReader) // mReader goes null when it's been sucked dry        {            size = mBuffer.length - mLevel; // available space            if (size < min) // oops, better get some buffer space            {                // unknown length... keep doubling                size = mBuffer.length * 2;                read = mLevel + min;                if (size < read) // or satisfy min, whichever is greater                    size = read;                else                    min = size - mLevel; // read the max                buffer = new char[size];            }            else            {                buffer = mBuffer;                min = size;            }            // read into the end of the 'new' buffer            read = mReader.read (buffer, mLevel, min);            if (EOF == read)            {                mReader.close ();                mReader = null;
inputstreamsource.java - 源码说明

本页面展示了「html 解析处理代码」中的 inputstreamsource.java 源码文件，采用 Java 编程语言编写，共 700 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与html相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?