jrip.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,656 行 · 第 1/4 页
JAVA
1,656 行
      // If there are already antecedents existing      for(int j=0; j < m_Antds.size(); j++){	Antd antdj = (Antd)m_Antds.elementAt(j);	if(!antdj.getAttr().isNumeric()){ 	  used[antdj.getAttr().index()]=true;	  numUnused--;	}       }	    	          double maxInfoGain;	          while (Utils.gr(growData.numInstances(), 0.0) && 	     (numUnused > 0) 	     && Utils.sm(defAcRt, 1.0)	     ){   			// We require that infoGain be positive	/*if(numAntds == originalSize)	  maxInfoGain = 0.0; // At least one condition allowed	  else	  maxInfoGain = Utils.eq(defAcRt, 1.0) ? 	  defAccu/(double)numAntds : 0.0; */	maxInfoGain = 0.0; 			/* Build a list of antecedents */	Antd oneAntd=null;	Instances coverData = null;	Enumeration enumAttr=growData.enumerateAttributes();	    	int index=-1;  			/* Build one condition based on all attributes not used yet*/	while (enumAttr.hasMoreElements()){	  Attribute att= (Attribute)(enumAttr.nextElement());	  index++;	  if(m_Debug)	    System.err.println("\nOne condition: size = " 			       + growData.sumOfWeights());		   	  Antd antd =null;		  if(att.isNumeric())	    antd = new NumericAntd(att);	  else	    antd = new NominalAntd(att);		    	  if(!used[index]){	    /* Compute the best information gain for each attribute,	       it's stored in the antecedent formed by this attribute.	       This procedure returns the data covered by the antecedent*/	    Instances coveredData = computeInfoGain(growData, defAcRt,						    antd);	    if(coveredData != null){	      double infoGain = antd.getMaxInfoGain();      	      if(m_Debug)		System.err.println("Test of \'"+antd.toString()+				   "\': infoGain = "+				   infoGain + " | Accuracy = " +				   antd.getAccuRate()+				   "="+antd.getAccu()				   +"/"+antd.getCover()+				   " def. accuracy: "+defAcRt);			    	      if(infoGain > maxInfoGain){         		oneAntd=antd;		coverData = coveredData;  		maxInfoGain = infoGain;	      }		    	    }	  }	}			if(oneAntd == null) break; // Cannot find antds			if(Utils.sm(oneAntd.getAccu(), m_MinNo)) break;// Too low coverage			//Numeric attributes can be used more than once	if(!oneAntd.getAttr().isNumeric()){ 	  used[oneAntd.getAttr().index()]=true;	  numUnused--;	}			m_Antds.addElement(oneAntd);	growData = coverData;// Grow data size is shrinking 		defAcRt = oneAntd.getAccuRate();      }    }		    /**      * Compute the best information gain for the specified antecedent     *       * @param instances the data based on which the infoGain is computed     * @param defAcRt the default accuracy rate of data     * @param antd the specific antecedent     * @param numConds the number of antecedents in the rule so far     * @return the data covered by the antecedent     */    private Instances computeInfoGain(Instances instances, double defAcRt, 				      Antd antd){	Instances data = instances;	      /* Split the data into bags.	 The information gain of each bag is also calculated in this procedure */      Instances[] splitData = antd.splitData(data, defAcRt, 					     m_Consequent); 	          /* Get the bag of data to be used for next antecedents */      if(splitData != null)	return splitData[(int)antd.getAttrValue()];      else return null;    }	    /**     * Prune all the possible final sequences of the rule using the      * pruning data.  The measure used to prune the rule is based on     * flag given.     *     * @param pruneData the pruning data used to prune the rule     * @param useWhole flag to indicate whether use the error rate of     *                 the whole pruning data instead of the data covered     */        public void prune(Instances pruneData, boolean useWhole){	Instances data = pruneData;	      double total = data.sumOfWeights();      if(!Utils.gr(total, 0.0))	return;	      /* The default accurate # and rate on pruning data */      double defAccu=computeDefAccu(data);	          if(m_Debug)		System.err.println("Pruning with " + defAccu + 			   " positive data out of " + total +			   " instances");		          int size=m_Antds.size();      if(size == 0) return; // Default rule before pruning	          double[] worthRt = new double[size];      double[] coverage = new double[size];      double[] worthValue = new double[size];      for(int w=0; w<size; w++){	worthRt[w]=coverage[w]=worthValue[w]=0.0;      }	          /* Calculate accuracy parameters for all the antecedents in this rule */      double tn = 0.0; // True negative if useWhole      for(int x=0; x<size; x++){	Antd antd=(Antd)m_Antds.elementAt(x);	Attribute attr= antd.getAttr();	Instances newData = data;	data = new Instances(newData, 0); // Make data empty			for(int y=0; y<newData.numInstances(); y++){	  Instance ins=newData.instance(y);		    	  if(antd.covers(ins)){   // Covered by this antecedent	    coverage[x] += ins.weight();	    data.add(ins);                 // Add to data for further pruning	    if((int)ins.classValue() == (int)m_Consequent) // Accurate prediction	      worthValue[x] += ins.weight();	  }	  else if(useWhole){ // Not covered	    if((int)ins.classValue() != (int)m_Consequent)	      tn += ins.weight();	  }				}			if(useWhole){	  worthValue[x] += tn;	  worthRt[x] = worthValue[x] / total;	}	else // Note if coverage is 0, accuracy is 0.5	  worthRt[x] = (worthValue[x]+1.0)/(coverage[x]+2.0);      }	          double maxValue = (defAccu+1.0)/(total+2.0);      int maxIndex = -1;      for(int i=0; i<worthValue.length; i++){	if(m_Debug){	  double denom = useWhole ? total : coverage[i];	  System.err.println(i+"(useAccuray? "+!useWhole+"): "			     + worthRt[i] + 			     "="+worthValue[i]+			     "/"+denom);	}	if(worthRt[i] > maxValue){ // Prefer to the 	  maxValue = worthRt[i]; // shorter rule	  maxIndex = i;	}      }	          /* Prune the antecedents according to the accuracy parameters */      for(int z=size-1;z>maxIndex;z--)	m_Antds.removeElementAt(z);           }	    /**     * Prints this rule     *     * @param classAttr the class attribute in the data     * @return a textual description of this rule     */    public String toString(Attribute classAttr) {      StringBuffer text =  new StringBuffer();      if(m_Antds.size() > 0){	for(int j=0; j< (m_Antds.size()-1); j++)	  text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");      }      text.append(" => " + classAttr.name() +		  "=" + classAttr.value((int)m_Consequent));	          return text.toString();    }  }      /**   * Builds Ripper in the order of class frequencies.  For each class   * it's built in two stages: building and optimization    *   * @param instances the training data   * @exception Exception if classifier can't be built successfully   */  public void buildClassifier(Instances instances) throws Exception {           if(instances.numInstances() == 0)	  throw new Exception(" No instances with a class value!");            if (instances.checkForStringAttributes()) 	  throw new UnsupportedAttributeTypeException(" Cannot handle string attributes!");            if (!instances.classAttribute().isNominal()) 	  throw new UnsupportedClassTypeException(" Only nominal class, please.");            m_Random = new Random(m_Seed);       m_Total = RuleStats.numAllConditions(instances);      if(m_Debug)	  System.err.println("Number of all possible conditions = "+m_Total);            Instances data = null;      m_Filter = new ClassOrder();            // Sth. to make the class order different each time in cross-validations    Instance inst =       instances.instance((int)(m_Random.nextDouble()*(double)instances.numInstances()));    ((ClassOrder)m_Filter).setSeed((long)inst.toString().hashCode());	    ((ClassOrder)m_Filter).setClassOrder(ClassOrder.FREQ_ASCEND);    m_Filter.setInputFormat(instances);    data = Filter.useFilter(instances, m_Filter);	    if(data == null)      throw new Exception(" Unable to randomize the class orders.");	    data.deleteWithMissingClass();    if(data.numInstances() == 0)      throw new Exception(" No instances with a class value!");        if(data.numInstances() < m_Folds)	throw new Exception(" Not enough data for REP.");        m_Class = data.classAttribute();	    m_Ruleset = new FastVector();    m_RulesetStats = new FastVector();    m_Distributions = new FastVector();    // Sort by classes frequency    double[] orderedClasses = ((ClassOrder)m_Filter).getClassCounts();    if(m_Debug){      System.err.println("Sorted classes:");      for(int x=0; x < m_Class.numValues(); x++)	System.err.println(x+": "+m_Class.value(x) + " has " +			   orderedClasses[x] + " instances.");    }    // Iterate from less prevalent class to more frequent one  oneClass:	    for(int y=0; y < data.numClasses()-1; y++){ // For each class	      	          double classIndex = (double)y;      if(m_Debug){	int ci = (int)classIndex;	System.err.println("\n\nClass "+m_Class.value(ci)+"("+ci+"): "			   + orderedClasses[y] + "instances\n"+			   "=====================================\n");      }		      if(Utils.eq(orderedClasses[y],0.0)) // No data for this class	continue oneClass;			          // The expected FP/err is the proportion of the class      double all = 0;      for(int i=y; i<orderedClasses.length; i++)	all += orderedClasses[i];      double expFPRate = orderedClasses[y] / all;	    		      double classYWeights = 0, totalWeights = 0;      for(int j=0; j < data.numInstances(); j++){	  Instance datum = data.instance(j);	  totalWeights += datum.weight();	  if((int)datum.classValue() == y){	      classYWeights += datum.weight();	  }	                }	                // DL of default rule, no theory DL, only data DL      double defDL;      if(classYWeights > 0)	  defDL = RuleStats.dataDL(expFPRate, 				   0.0,				   totalWeights,				   0.0,				   classYWeights);	          else	  continue oneClass; // Subsumed by previous rules      if(Double.isNaN(defDL) || Double.isInfinite(defDL))	throw new Exception("Should never happen: "+			    "defDL NaN or infinite!");      if(m_Debug)	System.err.println("The default DL = "+defDL);	          data = rulesetForOneClass(expFPRate, data, classIndex, defDL);    }    // Set the default rule    RipperRule defRule = new RipperRule();    defRule.setConsequent((double)(data.numClasses()-1));    m_Ruleset.addElement(defRule);	    RuleStats defRuleStat = new RuleStats();    defRuleStat.setData(data);    defRuleStat.setNumAllConds(m_Total);    defRuleStat.addAndUpdate(defRule);    m_RulesetStats.addElement(defRuleStat);    for(int z=0; z < m_RulesetStats.size(); z++){	RuleStats oneClass = (RuleStats)m_RulesetStats.elementAt(z);	for(int xyz=0; xyz < oneClass.getRulesetSize(); xyz++){	    double[] classDist = oneClass.getDistributions(xyz);	    Utils.normalize(classDist);	    if(classDist != null)		m_Distributions.addElement(((ClassOrder)m_Filter).distributionsByOriginalIndex(classDist));	}	    }      }      /**   * Classify the test instance with the rule learner and provide   * the class distributions    *   * @param datum the instance to be classified   * @return the distribution   */    public double[] distributionForInstance(Instance datum){	try{	    for(int i=0; i < m_Ruleset.size(); i++){		RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);		if(rule.covers(datum))		    return (double[])m_Distributions.elementAt(i); 	    }	}catch(Exception e){	    System.err.println(e.getMessage());	    e.printStackTrace();	}		System.err.println("Should never happen!");	return new double[datum.classAttribute().numValues()];    }  /** Build a ruleset for the given class according to the given data   *   * @param expFPRate the expected FP/(FP+FN) used in DL calculation   * @param data the given data   * @param classIndex the given class index   * @param defDL the default DL in the data   * @exception if the ruleset can be built properly   */  protected Instances rulesetForOneClass(double expFPRate, 					 Instances data, 					 double classIndex,					 double defDL)    throws Exception {	    Instances newData = data, growData, pruneData;  	    boolean stop = false;    FastVector ruleset = new FastVector();			    double dl = defDL, minDL = defDL;    RuleStats rstats = null;    double[] rst;	    // Check whether data have positive examples    boolean defHasPositive = true; // No longer used    boolean hasPositive = defHasPositive;	    /********************** Building stage ***********************/	    if(m_Debug)      System.err.println("\n*** Building stage ***");        while((!stop) && hasPositive){ // Generate new rules until      // stopping criteria met      RipperRule oneRule;      if(m_UsePruning){			/* Split data into Grow and Prune*/	// We should have stratified the data, but ripper seems	// to have a bug that makes it not to do so.  In order	// to simulate it more precisely, we do the same thing.	//newData.randomize(m_Random);		newData = RuleStats.stratify(newData, m_Folds, m_Random);			Instances[] part = RuleStats.partition(newData, m_Folds);	growData=part[0];	pruneData=part[1];	//growData=newData.trainCV(m_Folds, m_Folds-1);	//pruneData=newData.testCV(m_Folds, m_Folds-1);
jrip.java - 源码说明

本页面展示了「wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器」中的 jrip.java 源码文件，采用 Java 编程语言编写，共 1,656 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与university相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?