📄 testgetmethod.java
字号:
package cn.yicha.subject.spider.fecther;
import java.io.IOException;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import cn.yicha.subject.spider.store.*;
import cn.yicha.subject.spider.writer.NonPageLinkLog;
/**
* 如果是非页面,则记录并中断
* @author zhangdi
*
*/
public class TestGetMethod extends HttpMethodBase {
private String _origal_url = "";
public TestGetMethod() {
setFollowRedirects(true);
}
public TestGetMethod(String uri) {
super(uri);
_origal_url = uri;
setFollowRedirects(true);
}
public String getName() {
return "GET";
}
protected static boolean isPageLinkHeader(String ct) {
// if (ct.length() == 0) {
// return true;
// }
return (ct.startsWith("text/") && !ct.equalsIgnoreCase("text/vnd.sun.j2me.app-descriptor"))
|| ct.startsWith("application/vnd.wap");
}
protected static String getHeaderValueForName(HeaderGroup hg, String name) {
if (hg.getHeaders(name).length > 0) {
return hg.getHeaders(name)[0].getValue().trim();
} else {
return "";
}
}
protected void readResponseHeaders(HttpState state, HttpConnection conn)
throws IOException, HttpException {
super.readResponseHeaders(state, conn);
HeaderGroup hg = getResponseHeaderGroup();
String ct = getHeaderValueForName(hg, "Content-Type");
if (isPageLinkHeader(ct)) {
return;// 是页面就直接返回
} else { // 属于非页面链接
HeaderContent hc = new HeaderContent();
//hc.set_uri(getURI().getURI());
hc.set_uri(_origal_url);
hc.set_contentType(ct);
hc.set_contentLen(getHeaderValueForName(hg, "Content-Length"));
hc.set_lmt(getHeaderValueForName(hg, "Last-Modified"));
NonPageLinkLog.add(hc);
throw new URIException();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -