📄 getpagestr.java
字号:
/*===================================================================================
Copyright (c) 2004-2006 by www.koubei.com, All rights reserved.
8f., HuaXing technology building, 477# wensan road, HangZhou, China
This software is the confidential and proprietary information of
Koubei.com, Inc. ("Confidential Information"). You shall not disclose
such Confidential Information and shall use it only in accordance
with the terms of the license agreement you entered into with Koubei.com, Inc.
===================================================================================
File name: GetPageStr.java
Author: 公孙策
Date: 2007-1-24
Description:
Others:
Function List:
1. ...
History:
===================================================================================*/
package HtmlReaderJ;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
/*******************************************************************************
*
* 得到网页数据的类
*
* @author 公孙策
* @version 1.0
******************************************************************************/
public class GetPageStr {
/***************************************************************************
*
* @param args
**************************************************************************/
public static void main(String[] args) {
// TODO Auto-generated method stub
System.out.println("begin");
String str = GetPageStr.getWebContentGetMethod("http://info.china.alibaba.com/news/subjsearchbykewordrss.htm?keywords=风水&subjectid=5018260" , "GB2312");
FileControl.writeFile("c:/1.html" , str , 'o') ;
System.out.println(str);
System.out.println("end");
}
/**
* *************************************************************************
* 通过get方法得到一页数据
* @param url 地址
* @return 数据
*************************************************************************
*/
public static String getWebContentGetMethod( String url, String coding ){
if( StringProcessor.isEmpty(url)){
return null ;
}
//构造HttpClient的实例
HttpClient httpClient = new HttpClient();
// 创建GET方法的实例
GetMethod getMethod = new GetMethod( url );
// 使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
try {
// 执行getMethod
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: "
+ getMethod.getStatusLine());
}
// 读取内容
byte[] responseBody = getMethod.getResponseBody();
// 处理内容
String rs = new String(responseBody , coding );
return rs ;
} catch (HttpException e) {
// 发生致命的异常,可能是协议不对或者返回的内容有问题
//System.out.println("Please check your provided http address!");
//e.printStackTrace();
} catch (IOException e) {
//// 发生网络异常
//e.printStackTrace();
} finally {
// 释放连接
getMethod.releaseConnection();
}
return null ;
}
/**
* *************************************************************************
* utf-8 to gbk
* @param s 源字符串
* @return 目标字符串
*************************************************************************
*/
public static String GBK( String s ){
try{
System.out.println(s);
s = new String( s.getBytes("UTF-8"), "GBK");
System.out.println(s);
}catch( UnsupportedEncodingException e ){
return s ;
}
return s ;
}
/**
* *************************************************************************
* 通过post方式得到页面
* @param url
* @param data 各个表单域的值
* 形如{ new NameValuePair("id", "youUserName"),
* new NameValuePair("passwd", "yourPwd") }
* @return 页面内容
*************************************************************************
*/
public static String getWebContentPostMethod( String url , NameValuePair[] data , String coding ){
HttpClient httpClient = new HttpClient();
PostMethod postMethod = new PostMethod(url);
String sr = null ;
// 将表单的值放入postMethod中
postMethod.setRequestBody(data);
try {
// 执行postMethod
int statusCode = httpClient.executeMethod(postMethod);
// HttpClient对于要求接受后继服务的请求,象POST和PUT等不能自动处理转发
// 301或者302
if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY
|| statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
// 从头中取出转向的地址
Header locationHeader = postMethod
.getResponseHeader("location");
String location = null;
if (locationHeader != null) {
location = locationHeader.getValue();
return getWebContentGetMethod( location , coding ) ;
} else {
System.err.println("Location field value is null.");
}
}
sr = new String( postMethod.getResponseBody() , coding );
} catch (HttpException e) {
// 发生致命的异常,可能是协议不对或者返回的内容有问题
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
// 发生网络异常
e.printStackTrace();
} finally {
// 释放连接
postMethod.releaseConnection();
}
return sr ;
}
/**
* *************************************************************************
* 得到一页数据
* @param theURL url
* @return 内容
*************************************************************************
*/
public static String getWebContent(String theURL) {
String sTotalString = "";
URL l_url = null;
HttpURLConnection l_connection = null;
java.io.InputStream l_urlStream = null;
BufferedReader l_reader = null;
try {
l_url = new URL(theURL);
l_connection = (HttpURLConnection) l_url.openConnection();
l_connection.setConnectTimeout(5000);
l_connection.setRequestProperty("" , "") ;
l_connection.connect();
l_urlStream = l_connection.getInputStream();
l_reader = new BufferedReader(new InputStreamReader(l_urlStream));
int buffer_size = 1024;
char[] buffer = new char[buffer_size];
StringBuffer sb = new StringBuffer();
int readcount = 0;
while ((readcount = l_reader.read(buffer, 0, buffer_size)) > 0) {
sb.append(buffer, 0, readcount);
}
sTotalString = sb.toString();
l_reader.close();
l_urlStream.close();
l_connection.disconnect();
} catch (Exception e) {
System.out.println(e.toString());
} finally {
if (l_reader != null) {
try {
l_reader.close();
} catch (Exception e) {
}
}
if (l_urlStream != null) {
try {
l_urlStream.close();
} catch (Exception e) {
}
}
if (l_connection != null) {
try {
l_connection.disconnect();
} catch (Exception e) {
}
}
}
return sTotalString;
}
/**
* *************************************************************************
* 保存一副远程图像到本地
* @return 是否成功
*************************************************************************
*/
public static boolean saveImageLD( String urlstr , String savepath ){
if( StringProcessor.isEmpty( urlstr )|| StringProcessor.isEmpty(savepath)){
return false ;
}
DataInputStream di = null;
FileOutputStream fo = null;
byte [] b = new byte[1];
try {
// input
URL url = new URL( urlstr );
URLConnection urlConnection = url.openConnection();
urlConnection.connect();
di = new DataInputStream(urlConnection.getInputStream());
fo = new FileOutputStream( savepath );
// copy the actual file
// (it would better to use a buffer bigger than this)
while(-1 != di.read(b,0,1)) {
fo.write(b,0,1);
}
di.close();
fo.close();
}
catch (Exception ex) {
return false ;
}
return true ;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -