datanearbalancednd.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 572 行 · 第 1/2 页
JAVA
572 行
/* *    This program is free software; you can redistribsute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    DataNearBalancedND.java *    Copyright (C) 2005 University of Waikato * */package weka.classifiers.meta.nestedDichotomies;import weka.classifiers.Classifier;import weka.classifiers.RandomizableSingleClassifierEnhancer;import weka.classifiers.meta.FilteredClassifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Range;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.filters.Filter;import weka.filters.unsupervised.attribute.MakeIndicator;import weka.filters.unsupervised.instance.RemoveWithValues;import java.util.Hashtable;import java.util.Random;/** <!-- globalinfo-start --> * A meta classifier for handling multi-class datasets with 2-class classifiers by building a random data-balanced tree structure.<br/> * <br/> * For more info, check<br/> * <br/> * Lin Dong, Eibe Frank, Stefan Kramer: Ensembles of Balanced Nested Dichotomies for Multi-class Problems. In: PKDD, 84-95, 2005.<br/> * <br/> * Eibe Frank, Stefan Kramer: Ensembles of nested dichotomies for multi-class problems. In: Twenty-first International Conference on Machine Learning, 2004. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Dong2005, *    author = {Lin Dong and Eibe Frank and Stefan Kramer}, *    booktitle = {PKDD}, *    pages = {84-95}, *    publisher = {Springer}, *    title = {Ensembles of Balanced Nested Dichotomies for Multi-class Problems}, *    year = {2005} * } *  * &#64;inproceedings{Frank2004, *    author = {Eibe Frank and Stefan Kramer}, *    booktitle = {Twenty-first International Conference on Machine Learning}, *    publisher = {ACM}, *    title = {Ensembles of nested dichotomies for multi-class problems}, *    year = {2004} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -S &lt;num&gt; *  Random number seed. *  (default 1)</pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  * <pre> -W *  Full name of base classifier. *  (default: weka.classifiers.trees.J48)</pre> *  * <pre>  * Options specific to classifier weka.classifiers.trees.J48: * </pre> *  * <pre> -U *  Use unpruned tree.</pre> *  * <pre> -C &lt;pruning confidence&gt; *  Set confidence threshold for pruning. *  (default 0.25)</pre> *  * <pre> -M &lt;minimum number of instances&gt; *  Set minimum number of instances per leaf. *  (default 2)</pre> *  * <pre> -R *  Use reduced error pruning.</pre> *  * <pre> -N &lt;number of folds&gt; *  Set number of folds for reduced error *  pruning. One fold is used as pruning set. *  (default 3)</pre> *  * <pre> -B *  Use binary splits only.</pre> *  * <pre> -S *  Don't perform subtree raising.</pre> *  * <pre> -L *  Do not clean up after the tree has been built.</pre> *  * <pre> -A *  Laplace smoothing for predicted probabilities.</pre> *  * <pre> -Q &lt;seed&gt; *  Seed for random data shuffling (default 1).</pre> *  <!-- options-end --> * * @author Lin Dong * @author Eibe Frank */public class DataNearBalancedND   extends RandomizableSingleClassifierEnhancer  implements TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 5117477294209496368L;    /** The filtered classifier in which the base classifier is wrapped. */  protected FilteredClassifier m_FilteredClassifier;      /** The hashtable for this node. */  protected Hashtable m_classifiers=new Hashtable();  /** The first successor */  protected DataNearBalancedND m_FirstSuccessor = null;  /** The second successor */  protected DataNearBalancedND m_SecondSuccessor = null;    /** The classes that are grouped together at the current node */  protected Range m_Range = null;      /** Is Hashtable given from END? */  protected boolean m_hashtablegiven = false;      /**   * Constructor.   */  public DataNearBalancedND() {        m_Classifier = new weka.classifiers.trees.J48();  }    /**   * String describing default classifier.   *    * @return the default classifier classname   */  protected String defaultClassifierString() {        return "weka.classifiers.trees.J48";  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;    TechnicalInformation 	additional;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Lin Dong and Eibe Frank and Stefan Kramer");    result.setValue(Field.TITLE, "Ensembles of Balanced Nested Dichotomies for Multi-class Problems");    result.setValue(Field.BOOKTITLE, "PKDD");    result.setValue(Field.YEAR, "2005");    result.setValue(Field.PAGES, "84-95");    result.setValue(Field.PUBLISHER, "Springer");    additional = result.add(Type.INPROCEEDINGS);    additional.setValue(Field.AUTHOR, "Eibe Frank and Stefan Kramer");    additional.setValue(Field.TITLE, "Ensembles of nested dichotomies for multi-class problems");    additional.setValue(Field.BOOKTITLE, "Twenty-first International Conference on Machine Learning");    additional.setValue(Field.YEAR, "2004");    additional.setValue(Field.PUBLISHER, "ACM");        return result;  }  /**   * Set hashtable from END.   *    * @param table the hashtable to use   */  public void setHashtable(Hashtable table) {    m_hashtablegiven = true;    m_classifiers = table;  }      /**   * Generates a classifier for the current node and proceeds recursively.   *   * @param data contains the (multi-class) instances   * @param classes contains the indices of the classes that are present   * @param rand the random number generator to use   * @param classifier the classifier to use   * @param table the Hashtable to use   * @param instsNumAllClasses   * @throws Exception if anything goes worng   */  private void generateClassifierForNode(Instances data, Range classes,                                         Random rand, Classifier classifier, Hashtable table,                                         double[] instsNumAllClasses)     throws Exception {	    // Get the indices    int[] indices = classes.getSelection();    // Randomize the order of the indices    for (int j = indices.length - 1; j > 0; j--) {      int randPos = rand.nextInt(j + 1);      int temp = indices[randPos];      indices[randPos] = indices[j];      indices[j] = temp;    }    // Pick the classes for the current split    double total = 0;    for (int j = 0; j < indices.length; j++) {      total += instsNumAllClasses[indices[j]];    }    double halfOfTotal = total / 2;	    // Go through the list of classes until the either the left or    // right subset exceeds half the total weight    double sumLeft = 0, sumRight = 0;    int i = 0, j = indices.length - 1;    do {      if (i == j) {        if (rand.nextBoolean()) {          sumLeft += instsNumAllClasses[indices[i++]];        } else {          sumRight += instsNumAllClasses[indices[j--]];        }      } else {        sumLeft += instsNumAllClasses[indices[i++]];        sumRight += instsNumAllClasses[indices[j--]];      }    } while (Utils.sm(sumLeft, halfOfTotal) && Utils.sm(sumRight, halfOfTotal));    int first = 0, second = 0;    if (!Utils.sm(sumLeft, halfOfTotal)) {      first = i;    } else {      first = j + 1;    }    second = indices.length - first;    int[] firstInds = new int[first];    int[] secondInds = new int[second];    System.arraycopy(indices, 0, firstInds, 0, first);    System.arraycopy(indices, first, secondInds, 0, second);    	    // Sort the indices (important for hash key)!    int[] sortedFirst = Utils.sort(firstInds);
datanearbalancednd.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 datanearbalancednd.java 源码文件，采用 Java 编程语言编写，共 572 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?