📄 catchpage.java
字号:
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use Options | File Templates.
}*/
String listPage = "";
try {
listPage = BotTool.doGet(EntryURL);
} catch (IOException e) {
//To change body of catch statement use Options | File Templates.
System.out.println("exception: " + e.toString());
return false;
}
if (listPage.equals("")) {
System.out.println("listPage is error");
return false;
}
do {
if ((listPage != null) && (!listPage.equals(""))) {
System.out.println("ready to get Items on page"+nowPageUrl);
catchItemOnPage(listPage);
if (PageOk) {
String tempPage = listPage;
listPage = this.getNextPage(listPage);
if (tempPage.equalsIgnoreCase(listPage)) {
PageOk = false;
}
}
} else {
if(nowPageUrlTimes>10){
System.err.println("listPage is still empty,over time to finish");
return false;
}
System.err.println("listPage is empty,Try to go to page:"+nowPageUrl+"\nfor the "+Integer.toString(
++nowPageUrlTimes)+" time");
try {
listPage = BotTool.doGet(nowPageUrl);
} catch (IOException e) {
//To change body of catch statement use Options | File Templates.
System.out.println("exception: " + e.toString());
}
}
if (firstArticleDate!=null){
System.out.println("firstdate = " + BotTool.Date2String(firstArticleDate,"yyyy-MM-dd HH:mm:ss"));
}
} while (PageOk);
return true;
}
public String getNextPage(String listPage) {
int i = listPage.indexOf(pageMask, 0);
if (i != -1) {
int hrefIdx = listPage.lastIndexOf(pageHerfMask, i);
int beginIdx = listPage.indexOf(pageHerfBeginMask, hrefIdx);
int endIdx = listPage.indexOf(pageHerfEndMask, beginIdx + 1);
String tempString = listPage.substring(beginIdx + 1, endIdx);
String itemPageUrl = URLbase + tempString;
String itemPage = null;
try {
System.out.println("Go to Page:"+itemPageUrl);
nowPageUrl=itemPageUrl;
nowPageUrlTimes=0;
itemPage = BotTool.doGet(itemPageUrl);
} catch (Exception e) {
System.out.println("此页面没有读出来");
}
return itemPage;
}
return null;
}
public void catchItemOnPage(String listPage) {
//String BookMask=new String("<a href=\"ProductIntroduce.asp?ProductNo");
//int beginBook;
String itemPageUrl = "";
int i = listPage.length();
do {
i = listPage.lastIndexOf(articleMask, i);
if (i != -1) {
try{
int hrefIdx = listPage.indexOf(articleHerfMask, i);
int beginIdx = listPage.indexOf(articleHerfBeginMask, hrefIdx);
int endIdx = listPage.indexOf(articleHerfEndMask, beginIdx + 1);
String tempString = listPage.substring(beginIdx + articleHerfBeginMask.length(), endIdx);
if (!articleHerfTitleBeginMask.equals("")){
int hrefBeginIdx=tempString.indexOf(articleHerfTitleBeginMask);
int hrefEndIdx=tempString.indexOf(articleHerfTitleEndMask,hrefBeginIdx+1);
String title=tempString.substring(hrefBeginIdx+articleHerfTitleBeginMask.length(),hrefEndIdx);
try {
String encodeTitle=URLEncoder.encode(title,encoderStr);
tempString=BotTool.replaceString(tempString,title,encodeTitle);
} catch (UnsupportedEncodingException e) {
e.printStackTrace(); //To change body of catch statement use Options | File Templates.
}
}
//clover
itemPageUrl = URLbase + tempString;
if (articleTitleMask.equals("")){
if (articleHerfNoneMask.equals("")||itemPageUrl.indexOf(articleHerfNoneMask)==-1) {
System.out.println("URL:"+itemPageUrl+" Has been put into urlVector");
urlVector.add(itemPageUrl);
}
else{
System.out.println("Article's URL:"+itemPageUrl+" is not put into urlVector because its URL isn't needed");
}
}
else {
int titleBeginIdx = listPage.indexOf(articleTitleBeginMask, hrefIdx);
int titleEndIdx = listPage.indexOf(articleTitleEndMask, titleBeginIdx + 1);
String titleString = listPage.substring(titleBeginIdx + articleTitleBeginMask.length(), titleEndIdx);
if (titleString.indexOf(articleTitleMask)!=-1){
if (articleTitleNoneMask.equals("")||titleString.indexOf(articleTitleNoneMask)==-1) {
if (articleHerfNoneMask.equals("")||itemPageUrl.indexOf(articleHerfNoneMask)==-1) {
System.out.println("Article:"+titleString+" Has been put into urlVector");
urlVector.add(itemPageUrl);
}
else{
System.out.println("Article:"+titleString+" is not put into urlVector because its URL isn't needed");
}
}
else{
System.out.println("Article:"+titleString+" is not put into urlVector because its TITLE isn't needed");
}
}
}
//String textString=this.getTextFromHtml(itemPage);
}
catch(Exception exp){
System.out.println("Exception happened when PageUrl near "+itemPageUrl+"\n"+exp.toString());
}
Num++;
i = i - articleMask.length();
}
} while (i != -1);
}
public synchronized String getUrlFromVector(){
return (String)urlVector.remove(0);
}
public synchronized void updateFirstArticleDate(Date date){
if ((firstArticleDate==null)||(date.after(firstArticleDate))){
System.out.println("===Date===========");
System.out.println(date);
System.out.println("===Date===========");
firstArticleDate=date;
}
}
public boolean noURL(){
return urlVector.isEmpty();
}
public void setPageOk(boolean pageOk){
PageOk=pageOk;
}
public String getDirPath() {
return dirPath;
}
public static void main(String[] args) {
Date latestDate=BotTool.String2Date("2003-09-15 07:00:00","yyyy-MM-dd HH:mm:ss");
CatchPage catPage = new CatchPage(latestDate,"SMTH","SecondHand",2);
catPage.catchItemPage();
//CatchThread catThread=new CatchThread(catPage,"0");
//catThread.catchItem("http://bbs.pku.edu.cn/cgi-bin/bbstcon?board=SecondHand&to=%5B%D7%AA%C8%C3%5D%CE%DE%CF%DF%CD%F8%BF%A8&num=3666");
// Vector artVec=new Vector();
// artVec.add("pap13#162.105.203.40#实验商品#sale");
// artVec.add("pap14#162.105.203.40#实验商品#sale");
// artVec.add("pap15#162.105.203.40#实验商品#sale");
// artVec.add("pap16#162.105.203.40#实验商品#sale");
// artVec.add("pap17#162.105.203.40#实验商品#sale");
// artVec.add("pap18#162.105.203.40#实验商品#sale");
// artVec.add("pap19#162.105.203.40#实验商品#sale");
// artVec.add("pap20#162.105.203.40#实验商品#sale");
// artVec.add("pap21#162.105.203.40#实验商品#sale");
// artVec.add("pap22#162.105.203.40#实验商品#sale");
// artVec.add("pap23#162.105.203.40#实验商品#sale");
// artVec.add("pap24#162.105.203.40#实验商品#sale");
// catPage.postPage("PAP","papstart","1",artVec);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -