📄 login.java
字号:
package org.indigo.parser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;
import org.indigo.parser.Parser;
public class Login {
/**
* 当遇到需要登录的页面时,需要调用此方法。
* @param formUrl 登录页面中的表单中的action参数值。
* @param data 存放需要登录的参数。
* @param url 需要采集的页面URL
* @return
*/
public static String getHtmlByLogin(String formUrl,NameValuePair[] data,String url)
{
url = "http://www.iim.ac.cn/kaoqin3/new01.asp";
String sourceCode=null;
HttpClient httpClient = new HttpClient();
httpClient.getParams().setContentCharset("gb2312");
PostMethod postMethod = new PostMethod(formUrl);
postMethod.setRequestBody(data);
int statusCode=0;
// 执行postMethod
try {
statusCode =httpClient.executeMethod(postMethod);
String charSet=postMethod.getResponseCharSet();
InputStream is=postMethod.getResponseBodyAsStream();
BufferedReader br=new BufferedReader(new InputStreamReader(is,charSet));
String line=br.readLine();
while(line!=null)
{
sourceCode+=line.trim();
line=br.readLine();
}
// sourceCode=postMethod.getResponseBodyAsString();
} catch (HttpException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
return null;
} catch (IOException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
return null;
}
// HttpClient对于要求接受后继服务的请求,象POST和PUT等不能自动处理转发
// 301或者302
if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
// 从头中取出转向的地址
Header locationHeader = postMethod.getResponseHeader("location");
String location = null;
if (locationHeader != null)
{
location = locationHeader.getValue();
System.out.println("The page was redirected to:" + location);
}
else
{
System.err.println("Location field value is null.");
}
}
/**
* 登录之后,获取对应的Cookie,然后通过Cookie访问需要采集的页面。
*/
Cookie cookie[]= httpClient.getState().getCookies();
postMethod.releaseConnection();
HttpClient httpClient1 = new HttpClient();
HttpState state=new HttpState();
for(Cookie c:cookie)
{
state.addCookie(c);
}
httpClient1.setState(state);
httpClient1.getParams().setContentCharset("gb2312");
PostMethod post1=new PostMethod(url);
try {
httpClient1.executeMethod(post1);
sourceCode=post1.getResponseBodyAsString();
//String charSet=postMethod.getResponseCharSet();
//InputStream iss=postMethod.getResponseBodyAsStream();
// BufferedReader brr=new BufferedReader(new InputStreamReader(iss,charSet));
// String line=brr.readLine();
// while(line!=null)
// {
// sourceCode+=line.trim();
// line=brr.readLine();
// }
} catch (HttpException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
return null;
} catch (IOException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
return null;
}
// System.out.println(sourceCode);
// return null;
return sourceCode;
}
/**
* 遇到翻页时用到了js或表单,调用此方法。
* @param url 访问指定页面URL
* @param data 访问此页面需要的参数。
* @return
*/
public static String getHtmlByPost(String url,NameValuePair[] data) {
//String url = "http://www.scnjw.gov.cn/schq/schq.aspx";
String sourceCode=null;
HttpClient httpClient = new HttpClient();
PostMethod postMethod = new PostMethod(url);
// httpClient.getParams().setContentCharset("utf-8");
postMethod.addRequestHeader("Content-Type","application/x-www-form-urlencoded; charset=UTF-8");
postMethod.setRequestBody(data);
int statusCode=0;
// 执行postMethod
try {
statusCode =httpClient.executeMethod(postMethod);
String charSet=postMethod.getResponseCharSet();
InputStream is=postMethod.getResponseBodyAsStream();
BufferedReader br=new BufferedReader(new InputStreamReader(is,charSet));
String line=br.readLine();
while(line!=null)
{
sourceCode+=line.trim();
line=br.readLine();
}
} catch (HttpException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
return null;
} catch (IOException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
return null;
}
// HttpClient对于要求接受后继服务的请求,象POST和PUT等不能自动处理转发
// 301或者302
if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
// 从头中取出转向的地址
Header locationHeader = postMethod.getResponseHeader("location");
String location = null;
if (locationHeader != null)
{
location = locationHeader.getValue();
System.out.println("The page was redirected to:" + location);
}
else
{
System.err.println("Location field value is null.");
}
}
return sourceCode;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -