⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 model.java

📁 用来为垂直搜索引擎抓取数据的采集系统
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 * *****************************************************
 * Copyright (c) 2005 IIM Lab. All  Rights Reserved.
 * Created by xuehao at 2005-10-12
 * Contact: zxuehao@mail.ustc.edu.cn
 * *****************************************************
 */

package org.indigo.gui;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Observable;
import java.util.StringTokenizer;

import org.apache.commons.httpclient.NameValuePair;
import org.indigo.db.LogEntity;
import org.indigo.log.FileLogger;
import org.indigo.log.FileLoggerNumZero;
import org.indigo.log.FileLoggerParam;
import org.indigo.log.FileLoggerXml;
import org.indigo.pages.CollectedIdPageFactory;
import org.indigo.pages.CollectedIdsPage;
import org.indigo.pages.CollectedLinkedPages;
import org.indigo.pages.CollectedPage;
import org.indigo.pages.IdStrategy;
import org.indigo.pages.JNWAgriStrategy;
import org.indigo.pages.YztAgriStrategy;
import org.indigo.pages.VisitLinkedPages;
import org.indigo.pages.VisitPage;
import org.indigo.parser.IdFilter;
import org.indigo.parser.ItemParser;
import org.indigo.parser.PageParserManager;
import org.indigo.parser.Parser;
import org.indigo.util.MainConfig;
import org.indigo.util.TaskProperties;
import org.indigo.xml.ChildPair;
import org.indigo.xml.FiltPair;
/**
 * xyz;
 * xxx
 * <p>wbz</p>
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * @author wbz
 * 
 *
 */
public class Model extends Observable
{
    private String itsPropertyFile=null;
 
    private IView itsView = null;
    
    private String itsVisitBeginUrl=null;
    private String itsCollectedBeginUrl=null;
    private String itsVisitKey=null;
    private String itsCollectedKey=null;
    private String itsIdStrategy=null;
    private boolean itsDirectReturn=false;
    private String itsEncoding=null;
    private String itsIdFront=null, itsIdBack=null;
    private String itsIdBodyFront=null,itsIdBodyBack=null;
    private String itsInfoBodyFront=null,itsInfoBodyBack=null;
    private String itsLastItem=null;
    private int itsStart,itsEnd,itsInc;
    private ArrayList itsList=null;
    private boolean itsFilterFlag = true;
    private CollectedIdPageFactory idFactory=null;
    private boolean bLastReached=false;
    private boolean bLastItemAvailable=false;
    private boolean itsSplitIdFlag=false;
    private String itsSplitId=null;
    private String lastItemUrl=null;
    private boolean useUrlMark=false;
    private VisitPage itsVisitPage=null;
    private CollectedPage itsCollectedPage=null;
    private CollectedIdsPage itsIdsPage=null;
    private String childFront=null;
    private String childBack=null;
    private boolean existNest=false;
    private boolean IsIncludeJscript=false;
    private PageParserManager itsPageMag=null;
    private ArrayList paramerList=null;
    NameValuePair[] data=null;
    private String keyField=null;
    private String key=null;
    private String keyParam=null;
    private String formUrl=null;
    private boolean IsByLogin=false;
    private static final int itsMaxSize=100;
//    private static final int ItemParser = 0;
    private boolean isIdFilter=false;
 //   private String itsEncoding=null;
    public Model( String file )
    {
        itsPropertyFile = file;
    }
    private void readProperties()
    {
//        System.out.println( "Modal readProperty: " + itsPropertyFile );
        int i;
        
        TaskProperties props = new TaskProperties();
        props.open( itsPropertyFile );
//        System.out.println( itsPropertyFile );

        int vCount;
        itsVisitBeginUrl = props.getProperty("VisitBeginUrl");
        itsVisitKey = props.getProperty("VisitKey");
        String str2 = props.getProperty( "DirectReturn" );
        if( str2==null )
        	itsDirectReturn = false;
        else
        {
        	str2 = str2.trim();
        	if( str2.equalsIgnoreCase("true") )
        		itsDirectReturn = true;
        	else
        		itsDirectReturn = false;
        }
        
       
        itsStart = Integer.parseInt( props.getProperty("StartPageNum") );
        itsEnd = Integer.parseInt( props.getProperty("EndPageNum") );
        itsInc = Integer.parseInt( props.getProperty("incPageNum") );

        String idsStr=props.getProperty("IdsNest");
        if(idsStr!=null&&idsStr.equalsIgnoreCase("true"))
        	existNest=true;
        String JScript=props.getProperty("JScript");
        if(JScript!=null&&JScript.equalsIgnoreCase("true"))
        	IsIncludeJscript=true;
        if(IsIncludeJscript)
        {
        	keyField=props.getProperty("KeyField");
            key=props.getProperty("Key");
            keyParam=props.getProperty("KeyParam");
        	int paramCount=0;
        	String params=props.getProperty("ParamCount");
        	if(params!=null)
        		paramCount=Integer.parseInt(params);
        	
        	 formUrl=props.getProperty("FormUrl");
        	
        	 data=new NameValuePair[paramCount];
        	for(int ii=1;ii<=paramCount;ii++)
        	{String paramer=props.getProperty("Param"+ii);
        	 int kk=-1;
        	 kk=paramer.indexOf("=");
        	 if(kk!=-1)
        	 {
        		 NameValuePair pair=new NameValuePair(paramer.substring(0, kk),paramer.substring(kk+1));
        		 data[ii-1]=(pair);
        		 
        	 }
        		
        	}
        	
        }
        String LoginStr=props.getProperty("IsByLogin");
        if(LoginStr!=null&&LoginStr.equalsIgnoreCase("true"))
        	IsByLogin=true;
        if(IsByLogin)
        {
        	int paramCount=0;
        	String params=props.getProperty("ParamCount");
        	if(params!=null)
        		paramCount=Integer.parseInt(params);
        	
        	 formUrl=props.getProperty("FormUrl");
        	
        	 data=new NameValuePair[paramCount];
        	for(int ii=1;ii<=paramCount;ii++)
        	{String paramer=props.getProperty("Param"+ii);
        	 int kk=-1;
        	 kk=paramer.indexOf("=");
        	 if(kk!=-1)
        	 {
        		 NameValuePair pair=new NameValuePair(paramer.substring(0, kk),paramer.substring(kk+1));
        		 data[ii-1]=(pair);
        		 
        	 }
        		
        	}
        	
        }
        childFront=props.getProperty("ChildIdsFront");
        childBack=props.getProperty("ChildIdsBack");

        
        itsCollectedBeginUrl = props.getProperty("CollectedBeginUrl");
        itsCollectedKey = props.getProperty("CollectedKey");
        String str=null;
        
        str = props.getProperty( "IdStrategy" );
        if( str!=null )
        	itsIdStrategy = str.trim();
        
        str = props.getProperty( "Filter" );
        if( str==null )
        	itsFilterFlag = true;
        else
        {
        	str = str.trim();
        	if( str.equalsIgnoreCase("") )
        		itsFilterFlag = true;
        	else
        		itsFilterFlag = false;
        }
        /*
        str = props.getProperty( "CollectedIdSplit1" );
        if( str==null || str.equals("") )
            itsSplitIdFlag = false;
        itsSplitIdFlag = str.equalsIgnoreCase("true");
       */
//        itsSplitId = props.getProperty( "CollectedIdSplit" );
//        System.out.println( itsSplitId );
        
//        System.out.println( "itsCollectedBeginUrl:"+itsCollectedBeginUrl );
        /**
         * added by wbz;
         */
        itsIdBodyFront=props.getProperty("IdBodyFront");
        itsIdBodyBack=props.getProperty("IdBodyBack");
        itsInfoBodyFront=props.getProperty("InfoBodyFront");
        itsInfoBodyBack=props.getProperty("InfoBodyBack");
        itsIdFront = props.getProperty( "IdFront" );
        itsIdBack = props.getProperty( "IdBack" );
        itsEncoding=props.getProperty("Encode");
        //
        
        itsList = new ArrayList();
        
        
        int ruleCount = 0;
        int k;
        str = props.getProperty("RuleCount");
        ruleCount = Integer.parseInt(str);
        String front[] = new String[ruleCount];
        String back[] = new String[ruleCount];
        String child=new String();
        String tt=null;
        String tt1=null;
        String isVarDef=null;
        String varDefName=null;
        k=ruleCount;
        ChildPair cp=new ChildPair();
        /**
         * modified by wbz;
         */
        for (i = 0; i < k; )
        {
            str = props.getProperty("Front" + (i + 1) );
            front[i] = str;
            str = props.getProperty("Back" + (i + 1) );
            back[i] = str;
            tt1=props.getProperty("IsRegex"+(i+1));
            tt=props.getProperty("Default"+(i+1));
            isVarDef=props.getProperty("isVarDefault"+(i+1));
            varDefName=props.getProperty("varDefaultName"+(i+1));
            if(str!=null)
            	{
            	FiltPair fp=new FiltPair( front[i], back[i],tt1 ,tt);
            	fp.setIsVarDefault(isVarDef);
            	fp.setVarDefaultName(varDefName);
            	itsList.add( i,  fp);
            	}
            i++;
            if((child=props.getProperty("ChildRule" + (i + 1)))!=null)//
            {  
            	str = props.getProperty("Front" + (i + 1)+1 );
            	front[i] = str;
                str = props.getProperty("Back" + (i + 1)+1 );
                back[i] = str;
                cp.setItsStart(props.getProperty("Front"));
                cp.setItsEnd(props.getProperty("Back"));
                
            	itsList.add( i, new FiltPair( front[i],  back[i],i,cp) );
            	
            }//
          
            
            
        }
        String mark;
        itsLastItem = props.getProperty( "LastItem" );
        mark=props.getProperty("UseUrlMark");
        if(mark==null||mark.equals(""))
        {
        	useUrlMark=false;
        
        }
        else
        {
        	if(mark.equalsIgnoreCase("true"))
        	{
        		useUrlMark=true;
        		lastItemUrl=props.getProperty("LastItemUrl");
        		if(lastItemUrl==null)
        			lastItemUrl="";
        	}
        	else
        		useUrlMark=false;
        }
        str = props.getProperty( "LastItemAvailable" );
//        if(lastItemUrl!=null)
//        {
//        	lastItemUrl="";
//        }
        if( str==null || str.equals("") )
            bLastItemAvailable = false;
        else
        if( str.equalsIgnoreCase("true") )
            bLastItemAvailable = true;
        else
            bLastItemAvailable = false;
        String idTemp=props.getProperty("IdFilter");
        if(idTemp!=null&&idTemp.equals("true"))
        isIdFilter=true;
    }
    
    private void init()
    {
        itsVisitPage = new VisitPage( itsVisitKey );
        itsVisitPage.setBeginUrl( itsVisitBeginUrl );
        itsVisitPage.setParameters( itsStart, itsEnd, itsInc );
        itsVisitPage.setDirectReturn( itsDirectReturn );
        itsVisitPage.setIsIncludeJS(IsIncludeJscript);
        itsVisitPage.setJSValuePair(data);
        itsVisitPage.setKeyField(keyField);
        itsCollectedPage = new CollectedPage( itsCollectedKey );
        itsCollectedPage.setBeginUrl( itsCollectedBeginUrl );
        
        
        /*
        if( itsSplitIdFlag )
        {
            itsCollectedPage.setIdStrategy( new YztAgriStrategy() );
        }
        */
        /*
        if( itsSplitId.equalsIgnoreCase("jnw") )
        {
            itsCollectedPage.setIdStrategy( new JNWAgriStrategy() );
        }else
        if( itsSplitId.equalsIgnoreCase("yzt") )
        {
            itsCollectedPage.setIdStrategy( new YztAgriStrategy() );
        }
        */
        if( itsIdStrategy==null || itsIdStrategy.equalsIgnoreCase("") )
        {
        	
        }else
        {
        	try 
			{
				Class c = Class.forName( itsIdStrategy );
				IdStrategy s = (IdStrategy) c.newInstance();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -