📄 testpageparsermanager1.java
字号:
/*
* *****************************************************
* Copyright (c) 2005 IIM Lab. All Rights Reserved.
* Created by xuehao at 2005-10-12
* Contact: zxuehao@mail.ustc.edu.cn
* *****************************************************
*/
package org.indigo.tests.parser;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.indigo.pages.CollectedIdsPage;
import org.indigo.pages.CollectedPage;
import org.indigo.pages.VisitPage;
import org.indigo.parser.PageParserManager;
import org.indigo.parser.Parser;
public class TestPageParserManager1 extends TestCase
{
public void testPageParserManager1()
{
String url = "http://www.ahnw.gov.cn/scxx/schq/?datetime=&page=1&zl=&diqu=&chanpin=&dl=&NewDay=0";
VisitPage vPage = new VisitPage("page");
vPage.setBeginUrl(url);
vPage.setParameters(1, 3, 1);
CollectedPage cPage = new CollectedPage("page");
cPage.setBeginUrl( url );
CollectedIdsPage idsPage = new CollectedIdsPage();
idsPage.setVisitPage( vPage );
Parser parser = new Parser();
PageParserManager pageMag = new PageParserManager(true);
pageMag.setParser(parser);
String startStr, endStr;
startStr = "<td class=\"z\" width=\"24%\" height=20 style=\"border-right:1 solid #FFFFFF;border-bottom: 1 solid #FFFFFF\"> ";
endStr = "</td>";
pageMag.addField(startStr, endStr);
startStr = "<td width=\"11%\" class=\"z\" style=\"border-right:1 solid #FFFFFF;border-bottom: 1 solid #FFFFFF\"> ";
pageMag.addField(startStr, endStr);
startStr = "<td width=\"12%\" class=\"z\" style=\"border-right:1 solid #FFFFFF;border-bottom: 1 solid #FFFFFF\"> ";
pageMag.addField(startStr, endStr);
startStr = "<td width=\"45%\" class=\"z\" style=\"border-right:1 solid #FFFFFF;border-bottom: 1 solid #FFFFFF\"> ";
pageMag.addField(startStr, endStr);
startStr = "<td width=\"8%\" class=\"z\" align=\"center\" style=\"border-right:1 solid #FFFFFF;border-bottom: 1 solid #FFFFFF\" nowrap>";
pageMag.addField( startStr, endStr );
String aItem = null;
String nextUrl = null;
url = vPage.getCurrentLink();
while( url!=null )
{
idsPage.setUrl( url );
ArrayList ids=null;
ids = idsPage.getIds();
for( int i=0; i<ids.size(); i++ )
{
String id=null;
id = (String) ids.get(i);
url = cPage.getCollectedUrl( id );
System.out.println( url );
pageMag.setCollectedUrl(url);
pageMag.open();
do
{
aItem = pageMag.getAItem();
if (aItem != null)
System.out.println(aItem);
} while (aItem != null);
pageMag.close();
}
url = vPage.getNextVisitLink();
}
System.out.println( "TestCollectedPage1 over." );
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -