📄 httptool.java
字号:
package net.matuschek.http;
/*************************************************
Copyright (c) 2001/2002 by Daniel Matuschek
*************************************************/
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.net.InetAddress;
import java.net.URL;
import java.net.UnknownHostException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;
import java.util.Properties;
import net.matuschek.http.connection.HttpConnection;
import net.matuschek.http.connection.HttpsHelper;
import net.matuschek.http.cookie.Cookie;
import net.matuschek.http.cookie.CookieException;
import net.matuschek.http.cookie.CookieManager;
import net.matuschek.http.cookie.MemoryCookieManager;
import net.matuschek.util.Base64;
import net.matuschek.util.ByteBuffer;
import net.matuschek.util.ChunkedInputStream;
import net.matuschek.util.LimitedBandwidthStream;
import org.apache.log4j.Category;
/**
* Class for retrieving documents from HTTP servers.
*
* <p>The main purpose of this class is to retrieve a document
* from an HTTP server. </p>
*
* <p>For many purposes the Java URLInputStream is good for this,
* but if you want to have full control over the HTTP headers
* (both request and response headers), HttpTool is the answer. </p>
*
* <p>Also it defines a callback interface to inform a client about
* the state of the current download operation. </p>
*
* <p>It is possible to abort a download after getting the
* HTTP response headers from the server (e.g. if a document of
* this Content-Type is useless for your application or the document
* is to big or whatever you like) </p>
*
* <p>HttpTool is reusuable. You should initializes it once and use
* it for every download operation.</p>
*
* @author Daniel Matuschek
* @version $Id: HttpTool.java,v 1.28 2004/03/26 20:28:44 matuschd Exp $
*/
public class HttpTool {
/** Carriage return */
final static byte CR = 13;
/** Line feed */
final static byte LF = 10;
/** used HTTP version */
final static String HTTP_VERSION="HTTP/1.1";
/* Status constants */
/** HTTP connection will be established */
public final static int STATUS_CONNECTING=0;
/** HTTP connection was established, but no data where retrieved */
public final static int STATUS_CONNECTED=1;
/** data will be retrieved now */
public final static int STATUS_RETRIEVING=2;
/** download finished */
public final static int STATUS_DONE=3;
/** download could not be finished because a DownloadRule denied it */
public final static int STATUS_DENIEDBYRULE=4;
/** default HTTP port */
private final static int DEFAULT_HTTPPORT = 80;
/** default HTTPS port */
private final static int DEFAULT_HTTPSPORT = 443;
/** default agent name */
private final static String AGENTNAME =
"JoBo/1.4beta "
+"(http://www.matuschek.net/jobo.html)";
/**
* default update interval for calls of the callback interfaces
* (in bytes)
*/
private final static int DEFAULT_UPDATEINTERVAL =1024;
/** default socket timeout in seconds */
private final static int DEFAULT_SOCKETTIMEOUT=20;
/** HTTP AgentName header */
private String agentName = AGENTNAME;
/** HTTP Referer header */
private String referer = null;
/** HTTP From header */
private String fromAddress = null;
/** Date of the HTTP If-Modified-Since header */
private Date modifyDate = null;
/**
* maximal used bandwidth in bytes per second
* 0 disables bandwidth limitations
*/
private int bandwidth = 0;
/** proxy address */
private InetAddress proxyAddr = null;
/** proxy port number */
private int proxyPort = 0;
/** textual description of the proxy (format host:port) */
private String proxyDescr="";
/** timeout for getting data in seconds */
private int socketTimeout = DEFAULT_SOCKETTIMEOUT;
/** HttpTool should accept and use cookies */
private boolean cookiesEnabled = true;
/** Log4J Category object for logging */
private Category log = null;
/** Authentication infos */
private Properties userInfos = new Properties();
/** @link dependency */
/*#HttpDoc lnkHttpDoc;*/
/**
* defines after how many bytes read from the web
* server the Callback interface will be called
* (default updates after one kilobyte)
*/
private int updateInterval = DEFAULT_UPDATEINTERVAL;
/**
* callback interface that will be used after n bytes are
* read from the web server to update the state of the current
* retrieve operation to the application
*/
private HttpToolCallback callback=null;
/**
* DownloadRuleSet tells the HttpTool, if it should download
* the whole file after getting the headers
*/
private DownloadRuleSet downloadRules = null;
/**
* The cookie manager will be used to store cookies
*/
private CookieManager cookieManager = null;
/**
* The DateFormat instance will be used to format If-Modified-Since requests
*/
static SimpleDateFormat df;
private NTLMAuthorization ntlmAuthorization = null;
/*
* Initialize df to a formatter for timezone "GMT" and locale Locale.US
* without changing the default timezone. If-Modified-Since requests need
* to be in that format.
*/
static {
TimeZone local = TimeZone.getDefault();
TimeZone gmt = TimeZone.getTimeZone("GMT");
TimeZone.setDefault(gmt);
df = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
TimeZone.setDefault(local);
}
/**
* Initializes HttpTool with a new CookieManager (that will not contain
* any cookie).
* Enables logging
*/
public HttpTool() {
this.cookieManager = new MemoryCookieManager();
log = Category.getInstance(getClass().getName());
}
/**
* Sets the Referer: HTTP header
* @param referer value for the Referer header
*/
public void setReferer(String referer) {
this.referer = referer;
}
/**
* Sets the User-Agent: HTTP header
* @param name name of the user agent (may contain spaces)
*/
public void setAgentName(String name) {
this.agentName = name;
}
/**
* Gets the current setting of the User-Agent HTTP header
* @return the User-Agent name
*/
public String getAgentName() {
return agentName;
}
/**
* <b>Insiders BugFix</b>
* This method finishes the MemoryCleanupManager.
*/
public void finish() {
if (cookieManager != null) {
cookieManager.finish();
}
}
/**
* Sets the DownloadRules for this object <br />
* A download rule uses the HTTP return headers to decide if the
* download should be finished.
* @param rule a DownloadRule
*/
public void setDownloadRuleSet(DownloadRuleSet rules) {
this.downloadRules=rules;
}
/**
* Gets the DownloadRules for this object
* @return a DownloadRuleSet
*/
public DownloadRuleSet getDownloadRuleSet() {
return this.downloadRules;
}
/**
* Gets the timeout for getting data in seconds
* @return the value of sockerTimeout
* @see #setTimeout(int)
*/
public int getTimeout() {
return this.socketTimeout;
}
/**
* Sets the timeout for getting data. If HttpTool can't read
* data from a remote web server after this number of seconds
* it will stop the download of the current file
* @param timeout Timeout in seconds
*/
public void setTimeout(int timeout) {
this.socketTimeout = timeout;
}
/**
* Enable/disable cookies
* @param enable if true, HTTP cookies will be enabled, if false
* HttpTool will not use cookies
*/
public void setEnableCookies(boolean enable) {
this.cookiesEnabled=enable;
}
/**
* Get the status of the cookie engine
* @return true, if HTTP cookies are enabled, false otherwise
*/
public boolean getEnableCookies() {
return this.cookiesEnabled;
}
/**
* sets a proxy to use
* @param proxyDescr the Proxy definition in the format host:port
*/
public void setProxy(String proxyDescr)
throws HttpException
{
proxyAddr=null;
proxyPort=0;
String proxyHost = null;
if ((proxyDescr != null) &&
(! proxyDescr.equals(""))) {
int pos = proxyDescr.indexOf(":");
if (pos > 0) {
try {
String port = proxyDescr.substring(pos+1);
proxyHost = proxyDescr.substring(0,pos);
proxyPort = Integer.parseInt(port);
proxyAddr = InetAddress.getByName(proxyHost);
} catch (NumberFormatException e) {
throw new HttpException("Proxy definition incorrect, "+
"port not numeric: "+
proxyDescr);
} catch (UnknownHostException e) {
throw new HttpException("Host not found: "+proxyHost);
}
} else {
throw new HttpException("Proxy definition incorrect, "+
"fomat must be host:port: "+
proxyDescr);
}
}
this.proxyDescr=proxyDescr;
}
/**
* Gets a textual representation of the current proxy settings
* @return return the proxy settings in the format host:port
*/
public String getProxy() {
return proxyDescr;
}
/**
* Set the value of the "If-Modified-Since" header
* Usually, this is null and HttpTool will retrieve every
* document. Setting this to a date will retrieve only
* documents that were modified since this time
*/
public void setIfModifiedSince(Date modifyDate) {
this.modifyDate=modifyDate;
}
/**
* Returns the date used for the "If-Modified-Since" header
* @return a Date object if the "If-Modified-Since" header is set,
* null otherwise
*/
public Date getIfModifiedSince() {
return this.modifyDate;
}
/**
* Sets the content From: HTTP header
* @param fromAdress an email adress (e.g. some@where.com)
*/
public void setFromAddress(String fromAddress) {
this.fromAddress=fromAddress;
}
/**
* Gets the current callback object
* @return the defined HttpToolCallback object
*/
public HttpToolCallback getCallback() {
return callback;
}
/**
* Get the value of bandwidth.
* @return value of bandwidth.
*/
public int getBandwidth() {
return bandwidth;
}
/**
* Set the value of bandwidth.
* @param bandwith Value to assign to bandwidth.
*/
public void setBandwidth(int bandwidth) {
this.bandwidth = bandwidth;
}
/**
* Sets a callback object
*
* If set this object will be used to inform about the current
* status of the download. HttpTool will call methods of this
* object while retrieving a document.
*
* @param callback a callback object
* @see HttpToolCallback
*/
public void setCallback(HttpToolCallback callback) {
this.callback = callback;
}
/**
* Gets the current update interval
* @return the update interval in bytes
* @see #setUpdateInterval(int)
*/
public int getUpdateInterval() {
return updateInterval;
}
/**
* Sets the callback update interval
*
* This setting is used if a callback object is defined. Then after
* reading this number of bytes, the method
* <code>setHttpToolDocCurrentSize</code> will be called.
* You should not set this to a value smaller then 1000 unless your
* bandwidth is very small, because it will slow down downloads.
*
* @param updateInterval update interval in bytes
*
* @see HttpToolCallbackInterface#setHttpToolDocCurrentSize(int)
*/
public void setUpdateInterval(int updateInterval) {
if (updateInterval > 0) {
this.updateInterval = updateInterval;
} else {
throw new IllegalArgumentException("updateInterval must be > 0 (was "+
updateInterval+")");
}
}
/**
* Sets the CookieManager for this HttpTool
* By default a MemoryCookieManager will be used, but you can
* use this method to use your own CookieManager implementation
*
* @param cm an object that implements the CookieManager interface
*/
public void setCookieManager(CookieManager cm) {
this.cookieManager = cm;
}
/**
* Gets the CookieManager used by this HttpTool
*
* @return the CookieManager that will be used by this HttpTool
*/
public CookieManager getCookieManager() {
return this.cookieManager;
}
/**
* Delete all cookies
*/
public void clearCookies() {
if (cookieManager != null) {
cookieManager.clear();
}
}
/**
* Retrieves a document from the given URL.
* If Cookies are enabled it will use the CookieManager to set Cookies
* it got from former retrieveDocument operations.
*
* @param u the URL to retrieve (only http:// supported yet)
* @param method HttpConstants.GET for a GET request, HttpConstants.POST
* for a POST request
* @param parameters additional parameters. Will be added to the URL if
* this is a GET request, posted if it is a POST request
* @return a HttpDoc if a document was retrieved, null otherwise
*
* @see HttpConstants
*/
public HttpDoc retrieveDocument(URL u, int method, String parameters) throws HttpException {
DocAndConnection docAndConnection = retrieveDocumentInternal(u, method, parameters, null, null);
HttpDoc doc = docAndConnection != null ? docAndConnection.httpDoc : null;
if (doc != null && doc.getHttpCode() == 401) {
String authProtName = NTLMAuthorization.WWW_AUTHENTICATE_HEADER;
String authProtValue = doc.getHeaderValue(authProtName);
if (authProtValue == null) {
authProtName = NTLMAuthorization.PROXY_AUTHENTICATE_HEADER;
authProtValue = doc.getHeaderValue(authProtName);
}
if (authProtValue.indexOf(NTLMAuthorization.NTLM_TAG)>=0 ||
authProtValue.indexOf("Negotiate")>=0) {
try {
// STEP 1 - send NTLM-Request
NTLMAuthorization authorization = (NTLMAuthorization) ntlmAuthorization.clone();
authorization.setHost(u.getHost());
// log.info("NTLM-Authentication: " + authorization);
String auth = authorization.getRequest();
docAndConnection = retrieveDocumentInternal(u, method, parameters, null, auth);
// STEP 2 - receive NTLM-Nonce
doc = docAndConnection.httpDoc;
authProtValue = doc.getHeaderValue(authProtName);
authorization.extractNonce(authProtValue);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -