📄 urlreader.java
字号:
package cn.yicha.subject.spider.fecther;
import java.io.*;
import java.net.*;
public class URLReader
{
private static String _content_type = "";
/**
* 读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static String readUrlContent(String url, boolean withUTF) throws IOException
{
System.getProperties().put( "proxySet", "false" );
StringBuffer result = new StringBuffer();
BufferedReader in = null;
try {
// 建立连接,读取数据
URL requestedUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) requestedUrl.openConnection();
conn.setRequestProperty("User-Agent", "Nokia7650/1.0 symbianOS/6.1 series60/0.9 Profile/MIDP-1.0 Configuration/CLDC-1.0");
if (withUTF) {
in = new BufferedReader(new InputStreamReader(
conn.getInputStream(), "utf-8"));
}
else {
in = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
}
String inputLine;
while ( (inputLine = in.readLine()) != null) {
result.append(inputLine);
result.append("\n");
}
}
catch (IOException ex) {
throw ex;
}
finally {
if (in != null) {
in.close();
}
}
return result.toString();
}
/**
* 向一个指定的Url发送HTTP请求
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static void sendUrlRequest(String url) throws IOException
{
System.getProperties().put( "proxySet", "false" );
StringBuffer result = new StringBuffer();
BufferedReader in = null;
try {
// 建立连接,读取数据
URL requestedUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) requestedUrl.openConnection();
conn.getInputStream();
}
catch (IOException ex) {
ex.printStackTrace();
}
finally {
if (in != null) {
in.close();
}
}
}
/**
* 读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static String readUTFUrlContent(String url) throws IOException {
return readUrlContent(url, true);
}
/**
* 读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static String readUrlContent(String url) throws IOException {
return readUrlContentByProxy(url, "", "", false);
}
/**
* 读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static String readUrlContent(String url, String userAgent) throws IOException {
return readUrlContentByProxy(url, "", "", userAgent);
}
/**
* 读取一个指定的Url的内容以及SessionID并返回
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static String readUrlSessionID(String url) throws IOException {
StringBuffer result = new StringBuffer();
BufferedReader in = null;
String sessionId = null;
try {
// 建立连接,读取数据
URL requestedUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) requestedUrl.openConnection();
in = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
String inputLine;
while ( (inputLine = in.readLine()) != null) {
result.append(inputLine);
result.append("\n");
}
// 从响应中检索会话ID
String cookieValue = conn.getHeaderField("Set-Cookie");
if (cookieValue != null) {
sessionId = cookieValue.substring(0, cookieValue.indexOf(";"));
System.out.println("session id --> " + sessionId);
}
}
catch (Exception ex) {
ex.printStackTrace();
}
finally {
if (in != null) {
in.close();
}
}
return sessionId;
}
/**
* 设置SessionID,然后读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @throws IOException
* @return String
*/
public static String getUrlContentBySessionID(String url, String sessionId) throws IOException {
StringBuffer result = new StringBuffer();
BufferedReader in = null;
try {
// 建立连接,读取数据
URL requestedUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) requestedUrl.openConnection();
conn.setRequestProperty("Cookie", sessionId);
System.out.println("sending session id --> " + sessionId);
in = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
String inputLine;
while ( (inputLine = in.readLine()) != null) {
result.append(inputLine);
result.append("\n");
}
}
catch (Exception ex) {
ex.printStackTrace();
}
finally {
if (in != null) {
in.close();
}
}
return result.toString();
}
/**
* 通过代理读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @param proxyHost 代理主机地址
* @param proxyPort 代理端口地址
* @throws IOException
* @return String
*/
public static String readUrlContentByProxy(String url, String proxyHost, String proxyPort, boolean bForceUTF) throws IOException
{
final String _USER_AGENT = "Nokia6108/1.0 (05.04) Profile/MIDP-1.0 Configuration/CLDC-1.0";
return readUrlContentByProxy(url, proxyHost, proxyPort, _USER_AGENT, bForceUTF);
}
/**
* 通过代理读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @param proxyHost 代理主机地址
* @param proxyPort 代理端口地址
* @throws IOException
* @return String
*/
public static String readUrlContentByProxy(String url, String proxyHost, String proxyPort, String userAgent) throws IOException
{
return readUrlContentByProxy(url, proxyHost, proxyPort, userAgent, false);
}
/**
* 通过代理读取一个指定的Url的内容并返回
* @param url String 指定的url,如 http://www.1233.net
* @param proxyHost 代理主机地址
* @param proxyPort 代理端口地址
* @throws IOException
* @return String
*/
public static String readUrlContentByProxy(String url, String proxyHost, String proxyPort, String userAgent, boolean bForceUtf) throws IOException
{
final String _UTF_CHARSET = "utf-8";
// 设置代理
if (proxyHost != null && proxyPort != null) {
System.getProperties().put( "proxySet", "true" );
System.getProperties().put( "proxyHost", proxyHost );
System.getProperties().put( "proxyPort", proxyPort );
}
else {
System.getProperties().put( "proxySet", "false" );
}
StringBuffer result = new StringBuffer();
BufferedReader in = null;
try {
// 建立连接,读取数据
URL requestedUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) requestedUrl.openConnection();
System.out.println("user agent --> " + userAgent);
conn.setRequestProperty("User-Agent", userAgent);
conn.setRequestProperty("connection", "Keep-Alive");
conn.setRequestProperty("host", getHost(url));
conn.setRequestProperty("accept", "text/html, application/xhtml+xml; profile=http://www.wapforum.org/xhtml, application/vnd.wap.xhtml+xml, text/vnd.wap.wml, application/vnd.wap.wmlc, application/vnd.wap.wbxml, application/vnd.wap.wmlscriptc, */*");
conn.setRequestProperty("accept-charset", "utf-8, utf-16");
conn.setRequestProperty("accept-language", "English, Chinese");
conn.setRequestProperty("x-wap-profile", "http://gsm.lge.com/html/gsm/LG-U8138.xml");
conn.setRequestProperty("via","(infoX WAP Gateway), HTTP/1.1, Huawei Technologies");
//conn.setRequestProperty("x-up-calling-line-id", "13110238365");
conn.setRequestProperty("x-up-calling-line-id", "13910169234");
conn.setRequestProperty("x-huawei-authmethod", "MSISDN");
conn.setRequestProperty("x-forwarded-for", "10.77.15.185");
conn.setRequestProperty("x-up-bear-type", "GPRS");
conn.setRequestProperty("x-huawei-stacktype", "WAP2.0");
conn.setRequestProperty("x-huawei-networktype", "GSM");
conn.setRequestProperty("x-huawei-apn", "cmwap");
conn.setRequestProperty("x-huawei-nasip", "211.137.197.73");
_content_type = conn.getContentType();
// 根据编码不同设置不同的编码方式
if (bForceUtf || (conn.getContentType() != null && conn.getContentType().toLowerCase().indexOf(_UTF_CHARSET) >= 0)) {
in = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
}
else {
in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
}
// 读取网页内容
String inputLine;
while ( (inputLine = in.readLine()) != null) {
result.append(inputLine);
result.append("\n");
}
}
catch (IOException ex) {
throw ex;
}
finally {
if (in != null) {
in.close();
}
}
return result.toString();
}
/**
* 获取域名
*/
public static String getHost(String urlName)
{
try {
URL url = new URL(urlName);
String host = url.getHost();
return host;
}
catch (Exception e) {
}
return null;
}
public static void main(String[] args)
{
String content = "";
// String url = "http://wap.sina.com.cn";
String proxyHost = "10.0.0.172";
String proxyPort = "80";
String url = "http://cota.cn/video";
try {
content = readUrlContentByProxy(url, proxyHost, proxyPort, false);
System.out.println(content);
} catch (Exception e){
e.printStackTrace();
}
}
public static String get_content_type() {
return _content_type;
}
public static void set_content_type(String _content_type) {
URLReader._content_type = _content_type;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -