📄 nbtreemodelselection.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    C45ModelSelection.java *    Copyright (C) 2004 Mark Hall * */package weka.classifiers.trees.j48;import java.util.*;import weka.core.*;/** * Class for selecting a NB tree split. * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.2 $ */public class NBTreeModelSelection extends ModelSelection {  /** Minimum number of objects in interval. */  private int m_minNoObj;                 /** All the training data */  private Instances m_allData; //   /**   * Initializes the split selection method with the given parameters.   *   * @param minNoObj minimum number of instances that have to occur in at least two   * subsets induced by split   * @param allData FULL training dataset (necessary for   * selection of split points).   */  public NBTreeModelSelection(int minNoObj, Instances allData) {    m_minNoObj = minNoObj;    m_allData = allData;  }  /**   * Sets reference to training data to null.   */  public void cleanup() {    m_allData = null;  }  /**   * Selects NBTree-type split for the given dataset.   */  public final ClassifierSplitModel selectModel(Instances data){    double globalErrors = 0;    double minResult;    double currentResult;    NBTreeSplit [] currentModel;    NBTreeSplit bestModel = null;    NBTreeNoSplit noSplitModel = null;    int validModels = 0;    boolean multiVal = true;    Distribution checkDistribution;    Attribute attribute;    double sumOfWeights;    int i;        try{      // build the global model at this node      noSplitModel = new NBTreeNoSplit();      noSplitModel.buildClassifier(data);      if (data.numInstances() < 5) {	return noSplitModel;      }      // evaluate it      globalErrors = noSplitModel.getErrors();      if (globalErrors == 0) {	return noSplitModel;      }      // Check if all Instances belong to one class or if not      // enough Instances to split.      checkDistribution = new Distribution(data);      if (Utils.sm(checkDistribution.total(), m_minNoObj) ||	  Utils.eq(checkDistribution.total(),		   checkDistribution.perClass(checkDistribution.maxClass()))) {	return noSplitModel;      }      // Check if all attributes are nominal and have a       // lot of values.      if (m_allData != null) {	Enumeration enu = data.enumerateAttributes();	while (enu.hasMoreElements()) {	  attribute = (Attribute) enu.nextElement();	  if ((attribute.isNumeric()) ||	      (Utils.sm((double)attribute.numValues(),			(0.3*(double)m_allData.numInstances())))){	    multiVal = false;	    break;	  }	}      }      currentModel = new NBTreeSplit[data.numAttributes()];      sumOfWeights = data.sumOfWeights();      // For each attribute.      for (i = 0; i < data.numAttributes(); i++){		// Apart from class attribute.	if (i != (data).classIndex()){	  	  // Get models for current attribute.	  currentModel[i] = new NBTreeSplit(i,m_minNoObj,sumOfWeights);	  currentModel[i].setGlobalModel(noSplitModel);	  currentModel[i].buildClassifier(data);	  	  // Check if useful split for current attribute	  // exists and check for enumerated attributes with 	  // a lot of values.	  if (currentModel[i].checkModel()){	    validModels++;	  }	} else {	  currentModel[i] = null;	}      }            // Check if any useful split was found.      if (validModels == 0) {	return noSplitModel;      }           // Find "best" attribute to split on.      minResult = globalErrors;      for (i=0;i<data.numAttributes();i++){	if ((i != (data).classIndex()) &&	    (currentModel[i].checkModel())) {	  /*  System.err.println("Errors for "+data.attribute(i).name()+" "+	      currentModel[i].getErrors()); */	  if (currentModel[i].getErrors() < minResult) {	    bestModel = currentModel[i];	    minResult = currentModel[i].getErrors();	  }	}      }      //      System.exit(1);      // Check if useful split was found.            if (((globalErrors - minResult) / globalErrors) < 0.05) {	return noSplitModel;      }            /*      if (bestModel == null) {	System.err.println("This shouldn't happen! glob : "+globalErrors+			   " minRes : "+minResult);	System.exit(1);	} */      // Set the global model for the best split      //      bestModel.setGlobalModel(noSplitModel);      return bestModel;    }catch(Exception e){      e.printStackTrace();    }    return null;  }  /**   * Selects NBTree-type split for the given dataset.   */  public final ClassifierSplitModel selectModel(Instances train, Instances test) {    return selectModel(train);  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -