standardurlreader.java

来自「spam source codejasen-0.9jASEN - java An」· Java 代码 · 共 123 行

JAVA
123
字号
/*
 * @(#)StandardURLReader.java	 11/12/2004
 *
 * Copyright (c) 2004, 2005  jASEN.org
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright notice,
 *      this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in
 *      the documentation and/or other materials provided with the distribution.
 *
 *   3. The names of the authors may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *   4. Any modification or additions to the software must be contributed back
 *      to the project.
 *
 *   5. Any investigation or reverse engineering of source code or binary to
 *      enable emails to bypass the filters, and hence inflict spam and or viruses
 *      onto users who use or do not use jASEN could subject the perpetrator to
 *      criminal and or civil liability.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
 * OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */
package org.jasen.net;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;

import org.jasen.error.JasenException;
import org.jasen.interfaces.URLReader;
import org.jasen.io.NonBlockingStreamReader;


/**
 * <P>
 * 	Extracts the content from a remote web server for the purposes of analysis.
 * </P>
 * @author Jason Polites
 */
public class StandardURLReader implements URLReader {

	private int readBufferSize = 2048;
	private long readTimeout = 5000L; // 5 seconds

	/*
	 * (non-Javadoc)
	 * @see org.jasen.interfaces.URLReader#getHTML(java.net.URL)
	 */
	public String readURL(URL url) throws JasenException {

		OutputStream out = new ByteArrayOutputStream();
		InputStream in = null;
		String html = null;

		NonBlockingStreamReader reader = null;
		try {
			in = url.openStream();
			reader = new NonBlockingStreamReader();
			reader.read(in, out, readBufferSize, readTimeout, null);

			html = new String(((ByteArrayOutputStream)out).toByteArray());
		}
		catch (IOException e) {
			throw new JasenException(e);
		}
		finally {
			if(in != null) {
				try {
					in.close();
				}
				catch (IOException ignore) {}
			}
		}

		return html;
	}


	/**
	 * @return Returns the size (in bytes) of the buffer used when reading url data.
	 */
	public int getReadBufferSize() {
		return readBufferSize;
	}
	/**
	 * @param readBufferSize The size (in bytes) of the buffer used when reading url data.
	 */
	public void setReadBufferSize(int readBufferSize) {
		this.readBufferSize = readBufferSize;
	}
	/**
	 * @return Returns the time (in milliseconds) to wait for data from the url stream until reading is abnormally aborted.
	 */
	public long getReadTimeout() {
		return readTimeout;
	}
	/**
	 * @param readTimeout The time (in milliseconds) to wait for data from the url stream until reading is abnormally aborted.
	 */
	public void setReadTimeout(long readTimeout) {
		this.readTimeout = readTimeout;
	}
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?