⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classifiertree.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:

/**
 *
 *   AgentAcademy - an open source Data Mining framework for
 *   training intelligent agents
 *
 *   Copyright (C)   2001-2003 AA Consortium.
 *
 *   This library is open source software; you can redistribute it
 *   and/or modify it under the terms of the GNU Lesser General
 *   Public License as published by the Free Software Foundation;
 *   either version 2.0 of the License, or (at your option) any later
 *   version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free
 *   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *   MA  02111-1307 USA
 *
 */

package org.agentacademy.modules.dataminer.classifiers;

/**
 * <p>Title: The Data Miner prototype</p>
 * <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowle/pmmlDoc
 *
 * <p>Company: CERTH</p>
 * @author asymeon
 * @version 0.31
 */


import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;

import org.agentacademy.modules.dataminer.core.Drawable;
import org.agentacademy.modules.dataminer.core.FastVector;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Utils;
import org.jdom.DocType;
import org.jdom.Document;
import org.jdom.Element;
import org.apache.log4j.Logger;


/**
 * Class for handling a tree structure used for
 * classification.
 *
 */

 public class ClassifierTree implements Drawable, Serializable {


  public static Logger log = Logger.getLogger(ClassifierTree.class);

  /** The pmmlDocument Document */
  public static Document pmmlDocument = null;

  /** The model selection method. */
  protected ModelSelection m_toSelectModel;

  /** Local model at node. */
  protected ClassifierSplitModel m_localModel;

  /** References to sons. */
  protected ClassifierTree [] m_sons;

  /** True if node is leaf. */
  protected boolean m_isLeaf;

  /** True if node is empty. */
  protected boolean m_isEmpty;

  /** The training instances. */
  protected Instances m_train;

  /** The pruning instances. */
  protected Distribution m_test;

  /** The id for the node. */
  protected int m_id;
  protected Element currentNodeElement;

//  protected Element nextElement;
//  protected Element simplePredicateElement;
//  protected Element leafElement;
//  protected Element tempElement;

  protected static Vector ruleVector = new Vector();

  protected int vectorIndex = 0;

  /**
   * For getting a unique ID when outputting the tree (hashcode isn't
   * guaranteed unique)
   */
  private static long PRINTED_NODES = 0;

  /**
   * Gets the next unique node ID.
   *
   * @return the next unique node ID.
   */
  protected static long nextID() {

    return PRINTED_NODES ++;
  }

  /**
   * Resets the unique node ID counter (e.g.
   * between repeated separate print types)
   */
  protected static void resetID() {

    PRINTED_NODES = 0;
  }

  /**
   * Constructor.
   */
  public ClassifierTree(ModelSelection toSelectLocModel) {

    m_toSelectModel = toSelectLocModel;
  }

  /**
   * Method for building a classifier tree.
   *
   * @exception Exception if something goes wrong
   */
  public void buildClassifier(Instances data) throws Exception{

    if (data.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    buildTree(data, false);
  }

  /**
   * Builds the tree structure.
   *
   * @param data the data for which the tree structure is to be
   * generated.
   * @param keepData is training data to be kept?
   * @exception Exception if something goes wrong
   */
  public void buildTree(Instances data, boolean keepData) throws Exception{

    Instances [] localInstances;

    if (keepData) {
      m_train = data;
    }
    m_test = null;
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    m_localModel = m_toSelectModel.selectModel(data);
    if (m_localModel.numSubsets() > 1) {
      localInstances = m_localModel.split(data);
      data = null;
      m_sons = new ClassifierTree [m_localModel.numSubsets()];
      for (int i = 0; i < m_sons.length; i++) {
	m_sons[i] = getNewTree(localInstances[i]);
	localInstances[i] = null;
      }
    }else{
      m_isLeaf = true;
      if (Utils.eq(data.sumOfWeights(), 0))
	m_isEmpty = true;
      data = null;
    }
  }

  /**
   * Builds the tree structure with hold out set
   *
   * @param train the data for which the tree structure is to be
   * generated.
   * @param test the test data for potential pruning
   * @param keepData is training Data to be kept?
   * @exception Exception if something goes wrong
   */
  public void buildTree(Instances train, Instances test, boolean keepData)
       throws Exception{

    Instances [] localTrain, localTest;
    int i;

    if (keepData) {
      m_train = train;
    }
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    m_localModel = m_toSelectModel.selectModel(train, test);
    m_test = new Distribution(test, m_localModel);
    if (m_localModel.numSubsets() > 1) {
      localTrain = m_localModel.split(train);
      localTest = m_localModel.split(test);
      train = test = null;
      m_sons = new ClassifierTree [m_localModel.numSubsets()];
      for (i=0;i<m_sons.length;i++) {
	m_sons[i] = getNewTree(localTrain[i], localTest[i]);
	localTrain[i] = null;
	localTest[i] = null;
      }
    }else{
      m_isLeaf = true;
      if (Utils.eq(train.sumOfWeights(), 0))
	m_isEmpty = true;
      train = test = null;
    }
  }

  /**
   * Classifies an instance.
   *
   * @exception Exception if something goes wrong
   */
  public double classifyInstance(Instance instance)
    throws Exception {

    double maxProb = -1;
    double currentProb;
    int maxIndex = 0;
    int j;

    for (j = 0; j < instance.numClasses(); j++) {
      currentProb = getProbs(j, instance, 1);
      if (Utils.gr(currentProb,maxProb)) {
	maxIndex = j;
	maxProb = currentProb;
      }
    }

    return (double)maxIndex;
  }

  /**
   * Cleanup in order to save memory.
   */
  public final void cleanup(Instances justHeaderInfo) {

    m_train = justHeaderInfo;
    m_test = null;
    if (!m_isLeaf)
      for (int i = 0; i < m_sons.length; i++)
	m_sons[i].cleanup(justHeaderInfo);
  }

  /**
   * Returns class probabilities for a weighted instance.
   *
   * @exception Exception if something goes wrong
   */
  public final double [] distributionForInstance(Instance instance,
						 boolean useLaplace)
       throws Exception {

    double [] doubles = new double[instance.numClasses()];

    for (int i = 0; i < doubles.length; i++) {
      if (!useLaplace) {
	doubles[i] = getProbs(i, instance, 1);
      } else {
	doubles[i] = getProbsLaplace(i, instance, 1);
      }
    }

    return doubles;
  }

  /**
   * Assigns a uniqe id to every node in the tree.
   */
  public int assignIDs(int lastID) {

    int currLastID = lastID + 1;

    m_id = currLastID;
    if (m_sons != null) {
      for (int i = 0; i < m_sons.length; i++) {
	currLastID = m_sons[i].assignIDs(currLastID);
      }
    }
    return currLastID;
  }

  /**
   * Returns graph describing the tree.
   *
   * @exception Exception if something goes wrong
   */
  public String graph() throws Exception {

    StringBuffer text = new StringBuffer();

    assignIDs(-1);
    text.append("digraph J48Tree {\n");
    if (m_isLeaf) {
      text.append("N" + m_id
		  + " [label=\"" +
		  m_localModel.dumpLabel(0,m_train) + "\" " +
		  "shape=box style=filled ");
      if (m_train != null && m_train.numInstances() > 0) {
	text.append("data =\n" + m_train + "\n");
	text.append(",\n");

      }
      text.append("]\n");
    }else {
      text.append("N" + m_id
		  + " [label=\"" +
		  m_localModel.leftSide(m_train) + "\" ");
      if (m_train != null && m_train.numInstances() > 0) {
	text.append("data =\n" + m_train + "\n");
	text.append(",\n");
     }
      text.append("]\n");
      graphTree(text);
    }

    return text.toString() +"}\n";
  }

  /**
   * Returns tree in prefix order.
   *
   * @exception Exception if something goes wrong
   */
  public String prefix() throws Exception {

    StringBuffer text;

    text = new StringBuffer();
    if (m_isLeaf) {
      text.append("["+m_localModel.dumpLabel(0,m_train)+"]");
    }else {
      prefixTree(text);
    }

    return text.toString();
  }

  /**
   * Returns source code for the tree as an if-then statement. The
   * class is assigned to variable "p", and assumes the tested
   * instance is named "i". The results are returned as two stringbuffers:
   * a section of code for assignment of the class, and a section of
   * code containing support code (eg: other support methods).

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -