📄 catchpage.java
字号:
//Happy new Year 2003
package BBSSpider;
import java.io.*;
import java.net.*;
import java.util.*;
//Title: Your Product Name
//Version:
//Copyright: Copyright (c) 1999
//Author: Your Name
//Company: Your Company
//Description:
public class CatchPage {
Date latestDate;
String bbsString;
String boardString;
String URLbase;
String EntryURL;
String dirPath;
String errPath;
String pageMask;
String pageHerfMask;
String pageHerfBeginMask;
String pageHerfEndMask;
//String URLbase=new String("http://fm365.federal.com.cn/");
//String CategoryMask=new String("searchresult.asp");
int Num;
String articleMask;
String articleTitleMask;
String articleTitleNoneMask;
String articleTitleBeginMask;
String articleTitleEndMask;
String articleHerfMask;
String articleHerfNoneMask;
String articleHerfBeginMask;
String articleHerfEndMask;
String articleHerfTitleBeginMask;
String articleHerfTitleEndMask;
String articleDivider;
String articleRedirectionMask;
String articleRedirectionBeginMask;
String articleRedirectionEndMask;
String contentMask;
String contentBeginMask;
String contentEndMask;
String contentEndMask2;
String tidyBeginTag;
String tidyEndTag;
String titleMask;
String titleBeginMask;
String titleEndMask;
String authorMask;
String authorBeginMask;
String authorEndMask;
String aliasMask;
String aliasBeginMask;
String aliasEndMask;
String dateMask;
String dateBeginMask;
String dateEndMask;
String dateStrBeginMask;
String dateStrEndMask;
String dateRepresentForm;
String ipMask;
String ipBeginMask;
String ipEndMask;
String encoderStr="GB2312";
Vector urlVector;
Date firstArticleDate=null;
String nowPageUrl=EntryURL;
int nowPageUrlTimes=0;
int threadNum=15;
boolean PageOk = true;
CatchThread catThread[];
public CatchPage(Date theLatestDate,String theBBSString,String theBoardString,int theThreadNum) {
if (theLatestDate==null){
GregorianCalendar gc=new GregorianCalendar();
//gc.add(Calendar.MONTH,-1);
gc.add(Calendar.HOUR_OF_DAY,-2);
//gc.add(Calendar.DAY_OF_MONTH,-10);
latestDate=gc.getTime();
}
else{
latestDate=theLatestDate;
}
bbsString=theBBSString;
boardString=theBoardString;
threadNum=theThreadNum;
}
public void inint()
{
String propertiesFileName=".\\BBSTemplate\\"+bbsString+".properties";
FileInputStream is = null;
try {
is = new FileInputStream(propertiesFileName);
} catch (FileNotFoundException e) {
e.printStackTrace(); //To change body of catch statement use Options | File Templates.
}
Properties catchProps = new Properties();
try {
catchProps.load(is);
}
catch (Exception e) {
System.err.println(e.toString());
return;
}
URLbase = catchProps.getProperty("URLbase");
EntryURL =catchProps.getProperty("EntryURL")+boardString;
articleMask =catchProps.getProperty("articleMask");
articleHerfMask =catchProps.getProperty("articleHerfMask");
articleHerfNoneMask =catchProps.getProperty("articleHerfNoneMask");
articleHerfBeginMask=catchProps.getProperty("articleHerfBeginMask");
articleHerfEndMask=catchProps.getProperty("articleHerfEndMask");
articleTitleMask =BotTool.getChineseStr(catchProps.getProperty("articleTitleMask"));
articleTitleNoneMask =BotTool.getChineseStr(catchProps.getProperty("articleTitleNoneMask"));
articleTitleBeginMask=catchProps.getProperty("articleTitleBeginMask");
articleTitleEndMask=catchProps.getProperty("articleTitleEndMask");
articleHerfTitleBeginMask=catchProps.getProperty("articleHerfTitleBeginMask");
articleHerfTitleEndMask=catchProps.getProperty("articleHerfTitleEndMask");
dirPath = ".\\BBSArticle\\";
errPath=".\\ErrorBBSArticle\\";
pageMask =BotTool.getChineseStr(catchProps.getProperty("pageMask"));
pageHerfMask =catchProps.getProperty("pageHerfMask");
pageHerfBeginMask=catchProps.getProperty("pageHerfBeginMask");
pageHerfEndMask=catchProps.getProperty("pageHerfEndMask");
articleDivider=BotTool.getChineseStr(catchProps.getProperty("articleDivider"));
articleRedirectionMask =catchProps.getProperty("articleRedirectionMask");
articleRedirectionBeginMask=catchProps.getProperty("articleRedirectionBeginMask");
articleRedirectionEndMask=catchProps.getProperty("articleRedirectionEndMask");
contentMask=BotTool.getChineseStr(catchProps.getProperty("contentMask"));
contentBeginMask=catchProps.getProperty("contentBeginMask");
contentEndMask=catchProps.getProperty("contentEndMask");
contentEndMask2=BotTool.getChineseStr(catchProps.getProperty("contentEndMask2"));
tidyBeginTag=catchProps.getProperty("tidyBeginTag");
tidyEndTag=catchProps.getProperty("tidyEndTag");
titleMask=BotTool.getChineseStr(catchProps.getProperty("titleMask"));
titleBeginMask=catchProps.getProperty("titleBeginMask");
titleEndMask=catchProps.getProperty("titleEndMask");
authorMask=BotTool.getChineseStr(catchProps.getProperty("authorMask"));
authorBeginMask=catchProps.getProperty("authorBeginMask");
authorEndMask=catchProps.getProperty("authorEndMask");
aliasMask=BotTool.getChineseStr(catchProps.getProperty("aliasMask"));
aliasBeginMask=catchProps.getProperty("aliasBeginMask");
aliasEndMask=catchProps.getProperty("aliasEndMask");
dateMask=BotTool.getChineseStr(catchProps.getProperty("dateMask"));
dateBeginMask=catchProps.getProperty("dateBeginMask");
dateEndMask=catchProps.getProperty("dateEndMask");
dateStrBeginMask=catchProps.getProperty("dateStrBeginMask");
dateStrEndMask=BotTool.getChineseStr(catchProps.getProperty("dateStrEndMask"));
dateRepresentForm=BotTool.getChineseStr(catchProps.getProperty("dateRepresentForm"));
ipMask=catchProps.getProperty("ipMask");
ipBeginMask=catchProps.getProperty("ipBeginMask");
ipEndMask=catchProps.getProperty("ipEndMask");
urlVector = new Vector();
Num = 0;
catThread = new CatchThread[threadNum];
for (int i=0;i<threadNum;i++){
catThread[i]=new CatchThread(this,Integer.toString(i));
catThread[i].start();
}
}
public boolean catchItemPage() {
/*try {
BotTool.doGet("http://bbs.pku.edu.cn/cgi-bin/bbsdoc?dig=0&cookie_string=&board=DIY");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -