⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classifiertree.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    ClassifierTree.java
 *    Copyright (C) 1999 Eibe Frank
 *
 */

package weka.classifiers.trees.j48;

import java.io.Serializable;

import eti.bi.util.NumberFormatter;

import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;

/**
 * Class for handling a tree structure used for
 * classification.
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class ClassifierTree implements Drawable, Serializable {

  /**
	 * 
	 */
	private static final long serialVersionUID = -6678862025743147742L;

/** The model selection method. */  
  protected ModelSelection m_toSelectModel;     

  /** Local model at node. */
  protected ClassifierSplitModel m_localModel;  

  /** References to sons. */
  protected ClassifierTree [] m_sons;           

  /** True if node is leaf. */
  protected boolean m_isLeaf;                   

  /** True if node is empty. */
  protected boolean m_isEmpty;                  

  /** The training instances. */
  protected Instances m_train;                  

  /** The pruning instances. */
  protected Distribution m_test;     

  /** The id for the node. */
  protected int m_id;

  /** 
   * For getting a unique ID when outputting the tree (hashcode isn't
   * guaranteed unique) 
   */
  private static long PRINTED_NODES = 0;
  
  /**
   * The content of the classifierTree node. If it is a leaf node, it is the 
   * class name (total records in the bag / total incorrect predicted records
   * in the bag). 
   * 
   * ClassDisStr is the class distribution in this node for visulization purpose.
   * 
   * m_ClassPro is the probility of classifing an instance into a the most 
   * probably class.
   * 
   * By TWang. June 6, 2005. 
   */
  private String m_NodeContent; 
  private double[] m_NodeClassDist;
  private double m_ClassPro = -1;
  

  /**
   * Gets the next unique node ID.
   *
   * @return the next unique node ID.
   */
  protected static long nextID() {

    return PRINTED_NODES ++;
  }

  /**
   * Resets the unique node ID counter (e.g.
   * between repeated separate print types)
   */
  protected static void resetID() {

    PRINTED_NODES = 0;
  }

  /**
   * Constructor. 
   */
  public ClassifierTree(ModelSelection toSelectLocModel) {
    
    m_toSelectModel = toSelectLocModel;
  }

  /**
   * Method for building a classifier tree.
   *
   * @exception Exception if something goes wrong
   */
  public void buildClassifier(Instances data) throws Exception {

    if (data.checkForStringAttributes()) {
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    buildTree(data, false);
  }

  /**
   * Builds the tree structure.
   *
   * @param data the data for which the tree structure is to be
   * generated.
   * @param keepData is training data to be kept?
   * @exception Exception if something goes wrong
   */
  public void buildTree(Instances data, boolean keepData) throws Exception {

		Instances[] localInstances;

		if (keepData) {
			m_train = data;
		}
		m_test = null;
		m_isLeaf = false;
		m_isEmpty = false;
		m_sons = null;
		m_localModel = m_toSelectModel.selectModel(data);
		if (m_localModel.numSubsets() > 1) {
			localInstances = m_localModel.split(data);
			data = null;
			m_sons = new ClassifierTree[m_localModel.numSubsets()];
			for (int i = 0; i < m_sons.length; i++) {
				m_sons[i] = getNewTree(localInstances[i]);
				localInstances[i] = null;
			}
		} else {
			m_isLeaf = true;
			if (Utils.eq(data.sumOfWeights(), 0))
				m_isEmpty = true;
			data = null;
		}
	}
  
  /**
   * Reture if the node is a leaf or not. TWang, Jan 26, 2005.
   * 
   * @return
   */
  public boolean isLeaf(){
  	return m_isLeaf;
  }
  
  /**
   * Reture the children nodes
   * TWang, Jan 26, 2005.
   * @return
   */  
  public ClassifierTree[] getClassifierTreeChildren(){
  	return m_sons;
  }

  /**
   * Builds the tree structure with hold out set
   *
   * @param train the data for which the tree structure is to be
   * generated.
   * @param test the test data for potential pruning
   * @param keepData is training Data to be kept?
   * @exception Exception if something goes wrong
   */
  public void buildTree(Instances train, Instances test, boolean keepData)
       throws Exception {

		Instances[] localTrain, localTest;
		int i;

		if (keepData) {
			m_train = train;
		}
		m_isLeaf = false;
		m_isEmpty = false;
		m_sons = null;
		m_localModel = m_toSelectModel.selectModel(train, test);
		m_test = new Distribution(test, m_localModel);
		if (m_localModel.numSubsets() > 1) {
			localTrain = m_localModel.split(train);
			localTest = m_localModel.split(test);
			train = test = null;
			m_sons = new ClassifierTree[m_localModel.numSubsets()];
			for (i = 0; i < m_sons.length; i++) {
				m_sons[i] = getNewTree(localTrain[i], localTest[i]);
				localTrain[i] = null;
				localTest[i] = null;
			}
		} else {
			m_isLeaf = true;
			if (Utils.eq(train.sumOfWeights(), 0))
				m_isEmpty = true;
			train = test = null;
		}
	}

  /**
   * Classifies an instance.
   * 
   * @exception Exception
   *                if something goes wrong
   */
  public double classifyInstance(Instance instance) 
    throws Exception {

		double maxProb = -1;
		double currentProb;
		int maxIndex = 0;
		int j;

		for (j = 0; j < instance.numClasses(); j++) {
			currentProb = getProbs(j, instance, 1);
			if (Utils.gr(currentProb, maxProb)) {
				maxIndex = j;
				maxProb = currentProb;
			}
		}
		
		m_ClassPro = maxProb;

		return (double) maxIndex;
	}
  
  
  

  /**
   * Cleanup in order to save memory.
   */
  public final void cleanup(Instances justHeaderInfo) {

    m_train = justHeaderInfo;
    m_test = null;
    if (!m_isLeaf)
      for (int i = 0; i < m_sons.length; i++)
	m_sons[i].cleanup(justHeaderInfo);
  }

  /** 
   * Returns class probabilities for a weighted instance.
   *
   * @exception Exception if something goes wrong
   */
  public final double [] distributionForInstance(Instance instance,
						 boolean useLaplace) 
       throws Exception {

    double [] doubles = new double[instance.numClasses()];

    for (int i = 0; i < doubles.length; i++) {
      if (!useLaplace) {
	doubles[i] = getProbs(i, instance, 1);
      } else {
	doubles[i] = getProbsLaplace(i, instance, 1);
      }
    }

    return doubles;
  }

  /**
   * Assigns a uniqe id to every node in the tree.
   */
  public int assignIDs(int lastID) {

    int currLastID = lastID + 1;

    m_id = currLastID;
    if (m_sons != null) {
      for (int i = 0; i < m_sons.length; i++) {
	currLastID = m_sons[i].assignIDs(currLastID);
      }
    }
    return currLastID;
  }

  /**
   *  Returns the type of graph this classifier
   *  represents.
   *  @return Drawable.TREE
   */   
  public int graphType() {
      return Drawable.TREE;
  }

  /**
   * Returns graph describing the tree.
   *
   * @exception Exception if something goes wrong
   */
  public String graph() throws Exception {

    StringBuffer text = new StringBuffer();

    assignIDs(-1);
    text.append("digraph J48Tree {\n");
    if (m_isLeaf) {
      text.append("N" + m_id 
		  + " [label=\"" + 
		  m_localModel.dumpLabel(0,m_train) + "\" " + 
		  "shape=box style=filled ");
      if (m_train != null && m_train.numInstances() > 0) {
	text.append("data =\n" + m_train + "\n");
	text.append(",\n");

      }
      text.append("]\n");
    }else {
      text.append("N" + m_id 
		  + " [label=\"" + 
		  m_localModel.leftSide(m_train) + "\" ");
      if (m_train != null && m_train.numInstances() > 0) {
	text.append("data =\n" + m_train + "\n");
	text.append(",\n");
     }
      text.append("]\n");
      graphTree(text);
    }
    
    return text.toString() +"}\n";
  }

  /**
   * Returns tree in prefix order.
   *
   * @exception Exception if something goes wrong
   */
  public String prefix() throws Exception {
    
    StringBuffer text;

    text = new StringBuffer();
    if (m_isLeaf) {
      text.append("["+m_localModel.dumpLabel(0,m_train)+"]");
    }else {
      prefixTree(text);
    }
    
    return text.toString();
  }

  /**
   * Returns source code for the tree as an if-then statement. The 
   * class is assigned to variable "p", and assumes the tested 
   * instance is named "i". The results are returned as two stringbuffers: 
   * a section of code for assignment of the class, and a section of
   * code containing support code (eg: other support methods).
   *
   * @param className the classname that this static classifier has
   * @return an array containing two stringbuffers, the first string containing
   * assignment code, and the second containing source for support code.
   * @exception Exception if something goes wrong
   */
  public StringBuffer [] toSource(String className) throws Exception {
    
    StringBuffer [] result = new StringBuffer [2];
    if (m_isLeaf) {
      result[0] = new StringBuffer("    p = " 
	+ m_localModel.distribution().maxClass(0) + ";\n");
      result[1] = new StringBuffer("");
    } else {
      StringBuffer text = new StringBuffer();
      StringBuffer atEnd = new StringBuffer();

      long printID = ClassifierTree.nextID();

      text.append("  static double N") 
	.append(Integer.toHexString(m_localModel.hashCode()) + printID)
	.append("(Object []i) {\n")
	.append("    double p = Double.NaN;\n");

      text.append("    if (")
	.append(m_localModel.sourceExpression(-1, m_train))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -