📄 part.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PART.java * Copyright (C) 1999 Eibe Frank * */package weka.classifiers.rules;import weka.classifiers.Classifier;import weka.classifiers.rules.part.MakeDecList;import weka.classifiers.trees.j48.BinC45ModelSelection;import weka.classifiers.trees.j48.C45ModelSelection;import weka.classifiers.trees.j48.ModelSelection;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Summarizable;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for generating a PART decision list. Uses separate-and-conquer. Builds a partial C4.5 decision tree in each iteration and makes the "best" leaf into a rule.<br/> * <br/> * For more information, see:<br/> * <br/> * Eibe Frank, Ian H. Witten: Generating Accurate Rule Sets Without Global Optimization. In: Fifteenth International Conference on Machine Learning, 144-151, 1998. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Frank1998, * author = {Eibe Frank and Ian H. Witten}, * booktitle = {Fifteenth International Conference on Machine Learning}, * editor = {J. Shavlik}, * pages = {144-151}, * publisher = {Morgan Kaufmann}, * title = {Generating Accurate Rule Sets Without Global Optimization}, * year = {1998}, * PS = {http://www.cs.waikato.ac.nz/~eibe/pubs/ML98-57.ps.gz} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -C <pruning confidence> * Set confidence threshold for pruning. * (default 0.25)</pre> * * <pre> -M <minimum number of objects> * Set minimum number of objects per leaf. * (default 2)</pre> * * <pre> -R * Use reduced error pruning.</pre> * * <pre> -N <number of folds> * Set number of folds for reduced error * pruning. One fold is used as pruning set. * (default 3)</pre> * * <pre> -B * Use binary splits only.</pre> * * <pre> -U * Generate unpruned decision list.</pre> * * <pre> -Q <seed> * Seed for random data shuffling (default 1).</pre> * <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.7 $ */public class PART extends Classifier implements OptionHandler, WeightedInstancesHandler, Summarizable, AdditionalMeasureProducer, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 8121455039782598361L; /** The decision list */ private MakeDecList m_root; /** Confidence level */ private float m_CF = 0.25f; /** Minimum number of objects */ private int m_minNumObj = 2; /** Use reduced error pruning? */ private boolean m_reducedErrorPruning = false; /** Number of folds for reduced error pruning. */ private int m_numFolds = 3; /** Binary splits on nominal attributes? */ private boolean m_binarySplits = false; /** Generate unpruned list? */ private boolean m_unpruned = false; /** The seed for random number generation. */ private int m_Seed = 1; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for generating a PART decision list. Uses " + "separate-and-conquer. Builds a partial C4.5 decision tree " + "in each iteration and makes the \"best\" leaf into a rule.\n\n" + "For more information, see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Eibe Frank and Ian H. Witten"); result.setValue(Field.TITLE, "Generating Accurate Rule Sets Without Global Optimization"); result.setValue(Field.BOOKTITLE, "Fifteenth International Conference on Machine Learning"); result.setValue(Field.EDITOR, "J. Shavlik"); result.setValue(Field.YEAR, "1998"); result.setValue(Field.PAGES, "144-151"); result.setValue(Field.PUBLISHER, "Morgan Kaufmann"); result.setValue(Field.PS, "http://www.cs.waikato.ac.nz/~eibe/pubs/ML98-57.ps.gz"); return result; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result; if (m_unpruned) result = new MakeDecList(null, m_minNumObj).getCapabilities(); else if (m_reducedErrorPruning) result = new MakeDecList(null, m_numFolds, m_minNumObj, m_Seed).getCapabilities(); else result = new MakeDecList(null, m_CF, m_minNumObj).getCapabilities(); return result; } /** * Generates the classifier. * * @param instances the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); ModelSelection modSelection; if (m_binarySplits) modSelection = new BinC45ModelSelection(m_minNumObj, instances); else modSelection = new C45ModelSelection(m_minNumObj, instances); if (m_unpruned) m_root = new MakeDecList(modSelection, m_minNumObj); else if (m_reducedErrorPruning) m_root = new MakeDecList(modSelection, m_numFolds, m_minNumObj, m_Seed); else m_root = new MakeDecList(modSelection, m_CF, m_minNumObj); m_root.buildClassifier(instances); if (m_binarySplits) { ((BinC45ModelSelection)modSelection).cleanup(); } else { ((C45ModelSelection)modSelection).cleanup(); } } /** * Classifies an instance. * * @param instance the instance to classify * @return the classification * @throws Exception if instance can't be classified successfully */ public double classifyInstance(Instance instance) throws Exception { return m_root.classifyInstance(instance); } /** * Returns class probabilities for an instance. * * @param instance the instance to get the distribution for * @return the class probabilities * @throws Exception if the distribution can't be computed successfully */ public final double [] distributionForInstance(Instance instance) throws Exception { return m_root.distributionForInstance(instance); } /** * Returns an enumeration describing the available options. * * Valid options are: <p> * * -C confidence <br> * Set confidence threshold for pruning. (Default: 0.25) <p> * * -M number <br> * Set minimum number of instances per leaf. (Default: 2) <p> * * -R <br> * Use reduced error pruning. <p> * * -N number <br> * Set number of folds for reduced error pruning. One fold is * used as the pruning set. (Default: 3) <p> * * -B <br> * Use binary splits for nominal attributes. <p> * * -U <br> * Generate unpruned decision list. <p> * * -Q <br> * The seed for reduced-error pruning. <p> * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(7); newVector. addElement(new Option("\tSet confidence threshold for pruning.\n" + "\t(default 0.25)", "C", 1, "-C <pruning confidence>")); newVector. addElement(new Option("\tSet minimum number of objects per leaf.\n" + "\t(default 2)", "M", 1, "-M <minimum number of objects>")); newVector. addElement(new Option("\tUse reduced error pruning.", "R", 0, "-R")); newVector. addElement(new Option("\tSet number of folds for reduced error\n" + "\tpruning. One fold is used as pruning set.\n" + "\t(default 3)", "N", 1, "-N <number of folds>")); newVector. addElement(new Option("\tUse binary splits only.", "B", 0, "-B")); newVector. addElement(new Option("\tGenerate unpruned decision list.", "U", 0, "-U")); newVector. addElement(new Option("\tSeed for random data shuffling (default 1).", "Q", 1, "-Q <seed>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -C <pruning confidence> * Set confidence threshold for pruning. * (default 0.25)</pre> * * <pre> -M <minimum number of objects> * Set minimum number of objects per leaf. * (default 2)</pre> * * <pre> -R * Use reduced error pruning.</pre> * * <pre> -N <number of folds> * Set number of folds for reduced error * pruning. One fold is used as pruning set. * (default 3)</pre> * * <pre> -B * Use binary splits only.</pre> * * <pre> -U * Generate unpruned decision list.</pre> * * <pre> -Q <seed> * Seed for random data shuffling (default 1).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -