📄 model.java
字号:
itsCollectedPage.setIdStrategy( s );
} catch (ClassNotFoundException e)
{
e.printStackTrace();
}catch (InstantiationException e1)
{
e1.printStackTrace();
} catch (IllegalAccessException e1)
{
e1.printStackTrace();
}
}
idFactory=new CollectedIdPageFactory();
// if( itsIdFront==null || itsIdBack==null || itsIdFront.equals("") || itsIdBack.equals("") )
// {
// itsIdsPage = new CollectedIdsPage();
// }else
// {
// itsIdsPage = new CollectedIdsPage( itsIdFront, itsIdBack );
// }
itsIdsPage=idFactory.getCollectedIdsPage(itsIdFront, itsIdBack, itsIdBodyFront, itsIdBodyBack);
itsIdsPage.setVisitPage( itsVisitPage );
itsIdsPage.setIsByPost(IsIncludeJscript);
itsIdsPage.setItsEncoding(itsEncoding);
Parser parser = new Parser();
parser.setItsEncoding(itsEncoding);
itsPageMag = new PageParserManager( itsFilterFlag );
itsPageMag.setParser(parser);
itsPageMag.setIsByPost(IsIncludeJscript);
itsPageMag.setIsByLogin(IsByLogin);
itsPageMag.setPairList( itsList );
}
public void setView( IView view )
{
itsView = view;
}
public void doStrategy()
{
boolean bExit = false;
boolean bNotGet=false;
boolean ReachLast=false;
String nextUrl, url, aItem, tmpLastItem = null,firstItem=null,tmpUrl=null,firstUrl=null;
readProperties();
init();
// if I cannot get the page till valveDate then stop
int valveData=5;
String valveStr = MainConfig.getInstance().getProperty("ValveDate");
if( valveStr==null || valveStr.equals("") )
{
valveData = 5;
}else
valveData = Integer.parseInt( valveStr );
int notGetCount=0;
int count=0,j;
url = itsVisitPage.getCurrentLink();
// while( url!=null )
ArrayList ids=null;
String temp="";
boolean bGetFlag=false;
while( url!=null && !bLastReached && !bExit )
{
itsIdsPage.setUrl( url );
// System.out.println( url );
ids = itsIdsPage.getIds();
// System.out.println( "ids="+ids );
if(existNest)// added by wbz for ids conversion;
ids=itsCollectedPage.getIDs(ids, childFront, childBack);
for( int i=0; i<ids.size() && !bLastReached && !bExit; i++ )
{
String id=null;
id = (String) ids.get(i);
// System.out.println(ids.size()+" "+id);
if( id==null || id.equals("") )
continue;
itsCollectedPage.setIsByPost(IsIncludeJscript);
if(isIdFilter)
{
itsPageMag.clearTable();
id=IdFilter.Filter(id, itsPageMag);
}
url = itsCollectedPage.getCollectedUrl( id );
// url="http://www.iim.ac.cn/kaoqin3/new01.asp";
// System.out.println( url );
// url="http://www.csh.gov.cn/article_79275.html";
if(IsIncludeJscript)
{
this.ModifyPair(keyParam, url);
itsPageMag.setValuePair(data);
itsPageMag.setCollectedUrl(formUrl);
}else if(IsByLogin)
{
itsPageMag.setValuePair(data);
itsPageMag.setCollectedFromUrl(formUrl);
itsPageMag.setCollectedUrl(url);
}
else
{
itsPageMag.setCollectedUrl(url);
}
bGetFlag = itsPageMag.open();
if( !bGetFlag )
{
notGetCount++;
if( notGetCount>=valveData )
{
bExit = true;
bNotGet=true;
}
continue;
}
do
{
if(itsInfoBodyFront!=null&&itsInfoBodyBack!=null)// add by wbz for useful information;
{
if(!itsInfoBodyFront.equals("")&&!itsInfoBodyBack.equals(""))
itsPageMag.setItsPageStr(itsInfoBodyFront,itsInfoBodyBack);
}//
aItem = itsPageMag.getAItem();
if (aItem != null)
{
String str;
j = aItem.lastIndexOf( ItemParser.GAP_TOKEN );
str = aItem.substring( 0, j );
if(useUrlMark)// add by wbz for marking lastItem with URL;
{
if(firstUrl!=null)
{
if(firstUrl.equals(url))
{
bExit=true;
break;
}
}
else
{
firstUrl=url;
}
if( lastItemUrl.equals( url) )
{
temp=url;
bLastReached = true;
break;
}
if( tmpUrl==null )
tmpUrl = url;
}//
if( firstItem!=null )
{
// System.out.println( firstItem + "," + str );
if( firstItem.equals(str))
{
bExit = true;
// System.out.println( "bExit=true" );
// ReachLast=true;
break;
}
}else
{
firstItem = str;
}
str = getAShorterItem( str );
if( bLastItemAvailable && str.equals( itsLastItem ) )
{
temp=str;
bLastReached = true;
break;
}
if( tmpLastItem==null )
tmpLastItem = str;
count++;
// System.out.println( count + ") " + aItem);
setChanged();
notifyObservers( aItem );
}
} while (aItem != null);
// if(itsPageMag.getErrorField()!=null)
// System.out.println(itsPropertyFile.toString()+": "+itsPageMag.getErrorMark()+": "+itsPageMag.getErrorField());
itsPageMag.close();
// break;//edited by wbz for debug.
}
url = itsVisitPage.getNextVisitLink();
// break;//edited by wbz for debug.
}
if( bLastItemAvailable )// modified by wbz and add the function that marking lastItem with URL;
{
TaskProperties props = new TaskProperties( itsPropertyFile );
if( tmpLastItem!=null )
{ itsLastItem = tmpLastItem;
props.changeProperty( "LastItem", itsLastItem );}
if(tmpUrl!=null)
props.changeProperty("LastItemUrl", tmpUrl);
}
String str = itsPropertyFile + ": Get Total Items Count=" + count;
LogEntity log=new LogEntity();
String whyCode=null;
String zeroCode=null;
String Total=count+"";
log.setPATH(itsPropertyFile);
log.setRePath(itsPropertyFile);
log.setTOTAL(Total);
// FileLogger.getInstance().info( str );
// add by wbz for more particular logs;
if(bNotGet||!CollectedIdsPage.isOpen)
{
whyCode="获取ids时网络链接失败!";
FileLoggerParam.getInstance().info( str+" "+"获取ids时网络链接失败!" );
CollectedIdsPage.isOpen=false;
}
else if(ids.size()==0&&CollectedIdsPage.isOpen)
{
whyCode="获取ids字段配置错误!";
FileLoggerParam.getInstance().info( str+" "+"获取ids字段配置错误!" );
}
else
{
if( bLastReached)
{
FileLoggerParam.getInstance().info( str+" "+"已经到达上次采集数据!");//+temp+"<-->"+itsLastItem );
whyCode="已经到达上次采集数据!";
//FileLoggerParam.getInstance().info(temp+"<-->"+itsLastItem);
}
else if(firstItem==null&&bGetFlag)
{
whyCode=" 根据模板无法采集到数据!在第"+itsPageMag.getErrorMark()+
"字段处: "+itsPageMag.getErrorField();
FileLoggerParam.getInstance().info( str+" "+whyCode);
}
else if(firstItem==null&&!bGetFlag)
{
whyCode="网络连接失败!";
FileLoggerParam.getInstance().info( str+" "+whyCode);
}
else if(firstItem!=null&&!bLastReached)
{
whyCode=" 模板中设置的结束页面小于最后一条所在的页面!";
FileLoggerParam.getInstance().info( str+" "+" 模板中设置的结束页面小于最后一条所在的页面!" );
}
}
if( 0==count )
//FileLoggerNumZero.getInstance().info( str );
{ if(bNotGet||!CollectedIdsPage.isOpen)
{
zeroCode="获取ids时网络链接失败!";
FileLoggerNumZero.getInstance().info( str+" "+"获取ids时网络链接失败!" );
CollectedIdsPage.isOpen=false;
}
else if(ids.size()==0&&CollectedIdsPage.isOpen)
{
zeroCode="获取ids字段配置错误!" ;
FileLoggerNumZero.getInstance().info( str+" "+"获取ids字段配置错误!" );
}
else
{
if( bLastReached)
{
zeroCode="已经到达上次采集数据!";
FileLoggerNumZero.getInstance().info( str+" "+"已经到达上次采集数据!");//+temp+"<-->"+itsLastItem );
}
else if(firstItem==null&&bGetFlag)
{
zeroCode=" 根据模板无法采集到数据!在第"+itsPageMag.getErrorMark()+
"字段处: "+itsPageMag.getErrorField();
FileLoggerNumZero.getInstance().info( str+" "+zeroCode);
}
else if(firstItem==null&&!bGetFlag)
{
zeroCode="网络连接失败!";
FileLoggerParam.getInstance().info( str+" "+whyCode);
}
else if(firstItem!=null&&!bLastReached)
{
zeroCode=" 模板中设置的结束页面小于最后一条所在的页面!";
FileLoggerNumZero.getInstance().info( str+" "+" 模板中设置的结束页面小于最后一条所在的页面!" );
}
}
}
String totalStr=null;
if(whyCode!=null)
{
log.setWHYCODE(whyCode);
totalStr=str+" "+whyCode;
}
if(zeroCode!=null)
{
log.setZeroCode(zeroCode);
totalStr=str+" "+zeroCode;
}
if(totalStr!=null)
log.setWHYSTRING(totalStr);
String isTest = MainConfig.getInstance().getProperty("IsTest");
if(isTest==null||isTest.equalsIgnoreCase("false"))
{
//System.out.println(isTest);
log.Execute();
}
}
public NameValuePair[] ModifyPair(String key,String value)
{
int length=data.length;
for(int i=0;i<length;i++)
{
if(data[i].getName().equals(key))
data[i]=new NameValuePair(key,value);
}
return data;
}
public String getAShorterItem( String item )
{
String tmp="",str="";
StringTokenizer tok = new StringTokenizer( item, ItemParser.GAP_TOKEN );
while( tok.hasMoreTokens() )
{
tmp = tok.nextToken();
if( tmp.length()>itsMaxSize )
{
tmp = tmp.substring( 0, itsMaxSize );
}
str += tmp + ItemParser.GAP_TOKEN;
}
// System.out.println( "len="+str.length() );
// System.out.println( "str="+str );
if( str==null || str.equals("") )
return "";
str = str.substring( 0, str.length()-1 );
return str;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -