📄 infogainsplitcrit.java

📁 一个数据挖掘系统的源码
💻 JAVA
字号:

/**
 *   
 *   AgentAcademy - an open source Data Mining framework for
 *   training intelligent agents
 *
 *   Copyright (C)   2001-2003 AA Consortium.
 *
 *   This library is open source software; you can redistribute it 
 *   and/or modify it under the terms of the GNU Lesser General 
 *   Public License as published by the Free Software Foundation;   
 *   either version 2.0 of the License, or (at your option) any later 
 *   version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free 
 *   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
 *   MA  02111-1307 USA
 * 
 */

package org.agentacademy.modules.dataminer.classifiers;

/**
 * <p>Title: The Data Miner prototype</p>
 * <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
 * <p>Copyright: Copyright (c) 2002</p>
 * <p>Company: CERTH</p>
 * @author asymeon
 * @version 0.3
 */

import org.agentacademy.modules.dataminer.core.*;

/**
 * Class for computing the information gain for a given distribution.
 */
public final class InfoGainSplitCrit extends EntropyBasedSplitCrit{

  /**
   * This method is a straightforward implementation of the information
   * gain criterion for the given distribution.
   */
  public final double splitCritValue(Distribution bags) {

    double numerator;

    numerator = oldEnt(bags)-newEnt(bags);

    // Splits with no gain are useless.
    if (Utils.eq(numerator,0))
      return Double.MAX_VALUE;

    // We take the reciprocal value because we want to minimize the
    // splitting criterion's value.
    return bags.total()/numerator;
  }

  /**
   * This method computes the information gain in the same way
   * C4.5 does.
   *
   * @param distribution the distribution
   * @param totalNoInst weight of ALL instances (including the
   * ones with missing values).
   */
  public final double splitCritValue(Distribution bags,double totalNoInst) {

    double numerator;
    double noUnknown;
    double unknownRate;
    int i;

    noUnknown = totalNoInst-bags.total();
    unknownRate = noUnknown/totalNoInst;
    numerator = (oldEnt(bags)-newEnt(bags));
    numerator = (1-unknownRate)*numerator;

    // Splits with no gain are useless.
    if (Utils.eq(numerator,0))
      return 0;

    return numerator/bags.total();
  }

  /**
   * This method computes the information gain in the same way
   * C4.5 does.
   *
   * @param distribution the distribution
   * @param totalNoInst weight of ALL instances
   * @param oldEnt entropy with respect to "no-split"-model.
   */
  public final double splitCritValue(Distribution bags,double totalNoInst,
                                     double oldEnt) {

    double numerator;
    double noUnknown;
    double unknownRate;
    int i;

    noUnknown = totalNoInst-bags.total();
    unknownRate = noUnknown/totalNoInst;
    numerator = (oldEnt-newEnt(bags));
    numerator = (1-unknownRate)*numerator;

    // Splits with no gain are useless.
    if (Utils.eq(numerator,0))
      return 0;

    return numerator/bags.total();
  }
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -