📄 makeupurl.java
字号:
/*
* FileName MakeUpUrl.java
* Create Time 2006-5-17 17:06:05
* Author shiwei
* Descript 组织单前页面的URL
* Version
*/
package com.snoics.reptile.regex.url.impl;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import com.snoics.reptile.link.createUrl.BuildUrl;
import com.snoics.reptile.link.createUrl.IBuildUrl;
import com.snoics.reptile.regex.filter.DownloadUrlFilter;
import com.snoics.reptile.regex.filter.ForbidUrlFilter;
import com.snoics.reptile.regex.filter.IRegexFilter;
import com.snoics.reptile.regex.filter.RangeUrlFilter;
import com.snoics.reptile.regex.filter.RemoteUrlFilter;
import com.snoics.reptile.regex.filter.UnDownloadUrlFilter;
import com.snoics.reptile.regex.url.IFilterAllUrl;
import com.snoics.reptile.regex.url.IMakeUpUrl;
import com.snoics.reptile.util.UrlUtil;
public class MakeUpUrl implements IMakeUpUrl{
private String parentUrl="";
private Set allHtmlUrl=null;
private List remogeUrl=new ArrayList();
private List rangerUrl=new ArrayList();
private List forbidUrl=new ArrayList();
private List downloadUrl=new ArrayList();
private List unDownloadUrl=new ArrayList();
private List urlRegexList=new ArrayList();
private String htmlString="";
private IBuildUrl buildUrl=new BuildUrl();
public String getParentUrl() {
return parentUrl;
}
public void setParentUrl(String parentUrl) {
this.parentUrl = parentUrl;
}
public String getHtmlString() {
return htmlString;
}
public void setHtmlString(String htmlString) {
this.htmlString = htmlString;
}
/**
* 设置当前页面的所有URL
* @param allHtmlUrl
*/
public void setAllHtmlUrl(Set allHtmlUrl) {
this.allHtmlUrl=allHtmlUrl;
}
/**
* 获取单前页面的所有URL
* @return Set
*/
public Set getAllHtmlUrl() {
return allHtmlUrl;
}
/**
* 获取所有远程URL
* @return List
*/
public List getRemoteUrl() {
return remogeUrl;
}
/**
* 获取处于解析范围之内的URL
* @return List
*/
public List getRangeUrl() {
return rangerUrl;
}
/**
* 获取处于解析范围之内,但不被抓取的URL
* @return List
*/
public List getForbidUrl() {
return forbidUrl;
}
/**
* 获取不被抓取的二进制文件URL
* @return List
*/
public List getUnDownloadUrl() {
return unDownloadUrl;
}
/**
* 获取将被抓取到本地的二进制文件URL
* @return List
*/
public List getDownloadUrl() {
return downloadUrl;
}
/**
* 获取解析当前页面使用到的正则表达式
* @return List
*/
public List getUrlRegexList(){
return urlRegexList;
}
/**
* 设置解析当前页面使用到的正则表达式
* @param urlRegexList
*/
public void setUrlRegexList(List urlRegexList){
this.urlRegexList=urlRegexList;
}
/**
* 执行组织操作
*
*/
public void makeUp() {
if(allHtmlUrl==null) {
return;
}
Iterator iterator=allHtmlUrl.iterator();
while(iterator.hasNext()) {
String theHtmlUrl=(String)iterator.next();
String theNewHtmlUrl=buildUrl.buildResolvedUrl(theHtmlUrl,parentUrl);
IRegexFilter downloadUrlFilter=new DownloadUrlFilter();
downloadUrlFilter.setUrl(theNewHtmlUrl);
//如果属于需要下载到本地的二进制文件类型
if(downloadUrlFilter.filter()) {
downloadUrl.add(theHtmlUrl);
}else {
IRegexFilter remoteUrlFilter=new RemoteUrlFilter();
remoteUrlFilter.setUrl(theNewHtmlUrl);
//如果属于远程的URL
if(remoteUrlFilter.filter()) {
remogeUrl.add(theHtmlUrl);
}else {
IRegexFilter unDownloadUrlFilter=new UnDownloadUrlFilter();
unDownloadUrlFilter.setUrl(theNewHtmlUrl);
//如果属于不需要下载到本地的二进制文件类型
if(unDownloadUrlFilter.filter()) {
unDownloadUrl.add(theHtmlUrl);
}else {
IRegexFilter forbidUrlFilter=new ForbidUrlFilter();
forbidUrlFilter.setUrl(theNewHtmlUrl);
//如果属于禁止抓取的页面
if(forbidUrlFilter.filter()) {
forbidUrl.add(theHtmlUrl);
}else {
IRegexFilter rangeUrlFilter=new RangeUrlFilter();
rangeUrlFilter.setUrl(theNewHtmlUrl);
//如果属于抓取的页面范围
if(rangeUrlFilter.filter()) {
rangerUrl.add(theHtmlUrl);
}
}
}
}
}
}
//showInfo();
}
private void showInfo(){
System.out.println("getDownloadUrl="+getDownloadUrl());
System.out.println("getForbidUrl="+getForbidUrl());
System.out.println("getRangeUrl="+getRangeUrl());
System.out.println("getRemoteUrl="+getRemoteUrl());
System.out.println("getUnDownloadUrl="+getUnDownloadUrl());
}
public static void main(String[] args){
UrlUtil urlRegexUtil=new UrlUtil();
String url="http://192.168.0.1/index.jsp";
String htmlString=urlRegexUtil.getHtmlString(url);
//String htmlString="< a href = 11fdsjkf ><a href = fdsjkf><a href = fdsjkf> <a href = fdsjkf><a href = fdsjkf><a href = fdsjkf>";
IFilterAllUrl filterAllUrl=new FilterAllUrl();
filterAllUrl.setHtmlString(htmlString);
Set list=filterAllUrl.getHtmlUrl();
MakeUpUrl makeUpUrl=new MakeUpUrl();
makeUpUrl.setParentUrl(url);
makeUpUrl.setAllHtmlUrl(list);
makeUpUrl.makeUp();
makeUpUrl.showInfo();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -