📄 olm.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * OLM.java * Copyright (C) 2004 Stijn Lievens * */package weka.classifiers.misc;import weka.classifiers.RandomizableClassifier;import weka.classifiers.misc.monotone.Coordinates;import weka.classifiers.misc.monotone.DiscreteDistribution;import weka.classifiers.misc.monotone.EnumerationIterator;import weka.classifiers.misc.monotone.InstancesComparator;import weka.classifiers.misc.monotone.InstancesUtil;import weka.classifiers.misc.monotone.MultiDimensionalSort;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.estimators.DiscreteEstimator;import java.util.ArrayList;import java.util.Comparator;import java.util.Enumeration;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * This class is an implementation of the Ordinal Learning Method<br/> * Further information regarding the algorithm and variants can be found in:<br/> * <br/> * Arie Ben-David (1992). Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications. Decision Sciences. 23:1357-1372.<br/> * <br/> * Lievens, Stijn (2003-2004). Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Ben-David1992, * author = {Arie Ben-David}, * journal = {Decision Sciences}, * pages = {1357-1372}, * title = {Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications}, * volume = {23}, * year = {1992} * } * * @mastersthesis{Lievens2003-2004, * author = {Lievens, Stijn}, * school = {Ghent University}, * title = {Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.}, * year = {2003-2004} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -C <CL|REG> * Sets the classification type to be used. * (Default: REG)</pre> * * <pre> -A <MEAN|MED|MAX> * Sets the averaging type used in phase 1 of the classifier. * (Default: MEAN)</pre> * * <pre> -N <NONE|EUCL|HAM> * If different from NONE, a nearest neighbour rule is fired when the * rule base doesn't contain an example smaller than the instance * to be classified * (Default: NONE).</pre> * * <pre> -E <MIN|MAX|BOTH> * Sets the extension type, i.e. the rule base to use. * (Default: MIN)</pre> * * <pre> -sort * If set, the instances are also sorted within the same class * before building the rule bases</pre> * <!-- options-end --> * * @author Stijn Lievens (stijn.lievens@ugent.be) * @version $Revision: 1.1 $ */public class OLM extends RandomizableClassifier implements TechnicalInformationHandler { /** for serialization */ private static final long serialVersionUID = 3722951802290935192L; /** * Round the real value that is returned by the original algorithm * to the nearest label. */ public static final int CT_ROUNDED = 0; /** * No rounding is performed during classification, this is the * classification is done in a regression like way. */ public static final int CT_REAL = 1; /** the classification types */ public static final Tag[] TAGS_CLASSIFICATIONTYPES = { new Tag(CT_ROUNDED, "CL", "Round to nearest label"), new Tag(CT_REAL, "REG", "Regression-like classification") }; /** * Use the mean for averaging in phase 1. This is in fact a * non ordinal procedure. The scores used for averaging are the internal * values of WEKA. */ public static final int AT_MEAN = 0; /** * Use the median for averaging in phase 1. The possible values * are in the extended set of labels, this is labels in between the * original labels are possible. */ public static final int AT_MEDIAN = 1; /** * Use the mode for averaging in phase 1. The label * that has maximum frequency is used. If there is more * than one label that has maximum frequency, the lowest * one is prefered. */ public static final int AT_MAXPROB = 2; /** the averaging types */ public static final Tag[] TAGS_AVERAGINGTYPES = { new Tag(AT_MEAN, "MEAN", "Mean"), new Tag(AT_MEDIAN, "MED","Median"), new Tag(AT_MAXPROB, "MAX", "Max probability") }; /** * No nearest neighbour rule will be fired when * classifying an instance for which there is no smaller rule * in the rule base? */ public static final int DT_NONE = -1; /** * Use the Euclidian distance whenever a nearest neighbour * rule is fired. */ public static final int DT_EUCLID = 0; /** * Use the Hamming distance, this is the number of * positions in which the instances differ, whenever a * nearest neighbour rule is fired */ public static final int DT_HAMMING = 1; /** the distance types */ public static final Tag[] TAGS_DISTANCETYPES = { new Tag(DT_NONE, "NONE", "No nearest neighbor"), new Tag(DT_EUCLID, "EUCL", "Euclidean"), new Tag(DT_HAMMING, "HAM", "Hamming") }; /** * Use only the minimal extension, as in the original algorithm * of Ben-David. */ public static final int ET_MIN = 0; /** * Use only the maximal extension. In this case an algorithm * dual to the original one is performed. */ public static final int ET_MAX = 1; /** * Combine both the minimal and maximal extension, and use the * midpoint of the resulting interval as prediction. */ public static final int ET_BOTH = 2; /** the mode types */ public static final Tag[] TAGS_EXTENSIONTYPES = { new Tag(ET_MIN, "MIN", "Minimal extension"), new Tag(ET_MAX, "MAX", "Maximal extension"), new Tag(ET_BOTH, "BOTH", "Minimal and maximal extension") }; /** * The training examples, used temporarily. * m_train is cleared after the rule base is built. */ private Instances m_train; /** Number of classes in the original m_train */ private int m_numClasses; /** * The rule base, should be consistent and contain no * redundant rules. This is the rule base as in the original * algorithm of Ben-David. */ private Instances m_baseMin; /** * This is a complentary rule base, using the maximal rather * than the minimal extension. */ private Instances m_baseMax; /** * Map used in the method buildClassifier in order to quickly * gather all info needed for phase 1. This is a map containing * (Coordinates, DiscreteEstimator)-pairs. */ private Map m_estimatedDistributions; /** classification type */ private int m_ctype = CT_REAL; /** averaging type */ private int m_atype = AT_MEAN; /** distance type */ private int m_dtype = DT_EUCLID; /** mode type */ private int m_etype = ET_MIN; /** * Should the instances be sorted such that minimal (resp. maximal) * elements (per class) are treated first when building m_baseMin * (resp. m_baseMax). */ private boolean m_sort = false; /** * Returns a string describing the classifier. * @return a description suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "This class is an implementation of the Ordinal Learning " + "Method\n" + "Further information regarding the algorithm and variants " + "can be found in:\n\n" + getTechnicalInformation().toString(); } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); // instances result.setMinimumNumberInstances(0); return result; } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "Arie Ben-David"); result.setValue(Field.YEAR, "1992"); result.setValue(Field.TITLE, "Automatic Generation of Symbolic Multiattribute Ordinal Knowledge-Based DSSs: methodology and Applications"); result.setValue(Field.JOURNAL, "Decision Sciences"); result.setValue(Field.PAGES, "1357-1372"); result.setValue(Field.VOLUME, "23"); additional = result.add(Type.MASTERSTHESIS); additional.setValue(Field.AUTHOR, "Lievens, Stijn"); additional.setValue(Field.YEAR, "2003-2004"); additional.setValue(Field.TITLE, "Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken."); additional.setValue(Field.SCHOOL, "Ghent University"); return result; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String classificationTypeTipText() { return "Sets the classification type."; } /** * Sets the classification type. * * @param value the classification type to be set. */ public void setClassificationType(SelectedTag value) { if (value.getTags() == TAGS_CLASSIFICATIONTYPES) m_ctype = value.getSelectedTag().getID(); } /** * Gets the classification type. * * @return the classification type */ public SelectedTag getClassificationType() { return new SelectedTag(m_ctype, TAGS_CLASSIFICATIONTYPES); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String averagingTypeTipText() { return "Choses the way in which the distributions are averaged in " + "the first phase of the algorithm."; } /** * Sets the averaging type to use in phase 1 of the algorithm. * * @param value the averaging type to use */ public void setAveragingType(SelectedTag value) { if (value.getTags() == TAGS_AVERAGINGTYPES) m_atype = value.getSelectedTag().getID(); } /** * Gets the averaging type. * * @return the averaging type */ public SelectedTag getAveragingType() { return new SelectedTag(m_atype, TAGS_AVERAGINGTYPES); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String distanceTypeTipText() { return "Sets the distance that is to be used by the nearest neighbour " + "rule"; } /** * Sets the distance type to be used by a nearest neighbour rule (if any). * * @param value the distance type to use */ public void setDistanceType(SelectedTag value) { if (value.getTags() == TAGS_DISTANCETYPES) m_dtype = value.getSelectedTag().getID(); } /** * Gets the distance type used by a nearest neighbour rule (if any). * * @return the distance type */ public SelectedTag getDistanceType() { return new SelectedTag(m_dtype, TAGS_DISTANCETYPES); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String extensionTypeTipText() { return "Sets the extension type to use."; } /** * Sets the extension type to use. * The minimal extension is the one used by * Ben-David in the original algorithm. The maximal extension is * a completely dual variant of the minimal extension. When using * both, then the midpoint of the interval determined by both * extensions is returned. * * @param value the extension type to use */ public void setExtensionType(SelectedTag value) { if (value.getTags() == TAGS_EXTENSIONTYPES) m_etype = value.getSelectedTag().getID(); } /** * Gets the extension type. * * @return the extension type */ public SelectedTag getExtensionType() { return new SelectedTag(m_etype, TAGS_EXTENSIONTYPES); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String sortTipText() { return "If true, the instances are also sorted within the classes " + "prior to building the rule bases."; } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -