📄 crawltest.java
字号:
package cn.casia.ailab.ldy.cmt;
import java.io.File;
import java.io.IOException;
public class CrawlTest {
public static void main(String[] args) throws IOException, InterruptedException{
String pchome="D:\\863项目\\crawl\\pchome.txt";
String pchomeFolder=CrawlComment.dirCreate(pchome, "pchome1");
String pcExtractor=CrawlComment.dirCreate(pchome, "品牌URL2");
CrawlComment crawlC= new CrawlComment();
crawlC.homePageDownload(pchome, pchomeFolder);
crawlC.homePageExtract(pchomeFolder, pcExtractor);
String newDir=CrawlComment.dirCreate(pchomeFolder, "品牌主页3");
String[] productDir=null;
productDir=CrawlComment.dirReader(pcExtractor);
String productName="";
for(int productI=0;productI<productDir.length;productI++){
productName=productDir[productI].substring(productDir[productI].indexOf("_")+1, productDir[productI].indexOf(".txt"));
crawlC.productPageDown(pcExtractor+productDir[productI], newDir+productDir[productI]);
System.out.println("抓取"+productName+" 的产品主页 ok");
}//for(productI)
String urlDir=CrawlComment.dirCreate(newDir, "评论URL4");
String urlContentDir=CrawlComment.dirCreate(newDir, "评论网页5");
String commentConDir=CrawlComment.dirCreate(newDir, "评论内容6");
UrlCommentExtractor uCE=new UrlCommentExtractor();
String[] productDir1=CrawlComment.dirReader(newDir);
for(int productI=0;productI<productDir1.length;productI++){
uCE.urlExtract(newDir+productDir1[productI],urlDir+productDir1[productI]);
uCE.commentCrawl(urlDir+productDir1[productI], urlContentDir);
}//for(productI)
String[] commentDir=CrawlComment.dirReader(urlContentDir);
for(int commentI=0;commentI<commentDir.length;commentI++){
// System.out.println(commentDir[commentI]);
File subfolder= new File(commentConDir+commentDir[commentI]+"\\");
if(!subfolder.exists()){
subfolder.mkdir();
}
uCE.commentExtract(urlContentDir+commentDir[commentI]+"\\", commentConDir+commentDir[commentI]+"\\");
}//for(commentI)
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -