📄 osdlcore.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * OSDLCore.java * Copyright (C) 2004 Stijn Lievens */package weka.classifiers.misc.monotone;import weka.classifiers.Classifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.estimators.DiscreteEstimator;import java.util.Arrays;import java.util.Enumeration;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Vector;/** <!-- globalinfo-start --> * This class is an implementation of the Ordinal Stochastic Dominance Learner.<br/> * Further information regarding the OSDL-algorithm can be found in:<br/> * <br/> * S. Lievens, B. De Baets, K. Cao-Van (2006). A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting. Annals of Operations Research..<br/> * <br/> * Kim Cao-Van (2003). Supervised ranking: from semantics to algorithms.<br/> * <br/> * Stijn Lievens (2004). Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.<br/> * <br/> * For more information about supervised ranking, see<br/> * <br/> * http://users.ugent.be/~slievens/supervised_ranking.php * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Lievens2006, * author = {S. Lievens and B. De Baets and K. Cao-Van}, * journal = {Annals of Operations Research}, * title = {A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting}, * year = {2006} * } * * @phdthesis{Cao-Van2003, * author = {Kim Cao-Van}, * school = {Ghent University}, * title = {Supervised ranking: from semantics to algorithms}, * year = {2003} * } * * @mastersthesis{Lievens2004, * author = {Stijn Lievens}, * school = {Ghent University}, * title = {Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken}, * year = {2004} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -C <REG|WSUM|MAX|MED|RMED> * Sets the classification type to be used. * (Default: MED)</pre> * * <pre> -B * Use the balanced version of the Ordinal Stochastic Dominance Learner</pre> * * <pre> -W * Use the weighted version of the Ordinal Stochastic Dominance Learner</pre> * * <pre> -S <value of interpolation parameter> * Sets the value of the interpolation parameter (not with -W/T/P/L/U) * (default: 0.5).</pre> * * <pre> -T * Tune the interpolation parameter (not with -W/S) * (default: off)</pre> * * <pre> -L <Lower bound for interpolation parameter> * Lower bound for the interpolation parameter (not with -W/S) * (default: 0)</pre> * * <pre> -U <Upper bound for interpolation parameter> * Upper bound for the interpolation parameter (not with -W/S) * (default: 1)</pre> * * <pre> -P <Number of parts> * Determines the step size for tuning the interpolation * parameter, nl. (U-L)/P (not with -W/S) * (default: 10)</pre> * <!-- options-end --> * * @author Stijn Lievens (stijn.lievens@ugent.be) * @version $Revision: 1.1 $ */public abstract class OSDLCore extends Classifier implements TechnicalInformationHandler { /** for serialization */ private static final long serialVersionUID = -9209888846680062897L; /** * Constant indicating that the classification type is * regression (probabilistic weighted sum). */ public static final int CT_REGRESSION = 0; /** * Constant indicating that the classification type is * the probabilistic weighted sum. */ public static final int CT_WEIGHTED_SUM = 1; /** * Constant indicating that the classification type is * the mode of the distribution. */ public static final int CT_MAXPROB = 2; /** * Constant indicating that the classification type is * the median. */ public static final int CT_MEDIAN = 3; /** * Constant indicating that the classification type is * the median, but not rounded to the nearest class. */ public static final int CT_MEDIAN_REAL = 4; /** the classification types */ public static final Tag[] TAGS_CLASSIFICATIONTYPES = { new Tag(CT_REGRESSION, "REG", "Regression"), new Tag(CT_WEIGHTED_SUM, "WSUM", "Weighted Sum"), new Tag(CT_MAXPROB, "MAX", "Maximum probability"), new Tag(CT_MEDIAN, "MED", "Median"), new Tag(CT_MEDIAN_REAL, "RMED", "Median without rounding") }; /** * The classification type, by default set to CT_MEDIAN. */ private int m_ctype = CT_MEDIAN; /** * The training examples. */ private Instances m_train; /** * Collection of (Coordinates,DiscreteEstimator) pairs. * This Map is build from the training examples. * The DiscreteEstimator is over the classes. * Each DiscreteEstimator indicates how many training examples * there are with the specified classes. */ private Map m_estimatedDistributions; /** * Collection of (Coordinates,CumulativeDiscreteDistribution) pairs. * This Map is build from the training examples, and more * specifically from the previous map. */ private Map m_estimatedCumulativeDistributions; /** * The interpolationparameter s. * By default set to 1/2. */ private double m_s = 0.5; /** * Lower bound for the interpolationparameter s. * Default value is 0. */ private double m_sLower = 0.; /** * Upper bound for the interpolationparameter s. * Default value is 1. */ private double m_sUpper = 1.0; /** * The number of parts the interval [m_sLower,m_sUpper] is * divided in, while searching for the best parameter s. * This thus determines the granularity of the search. * m_sNrParts + 1 values of the interpolationparameter will * be tested. */ private int m_sNrParts = 10; /** * Indicates whether the interpolationparameter is to be tuned * using leave-one-out cross validation. <code> true </code> if * this is the case (default is <code> false </code>). */ private boolean m_tuneInterpolationParameter = false; /** * Indicates whether the current value of the interpolationparamter * is valid. More specifically if <code> * m_tuneInterpolationParameter == true </code>, and * <code> m_InterpolationParameter == false </code>, * this means that the current interpolation parameter is not valid. * This parameter is only relevant if <code> m_tuneInterpolationParameter * == true </code>. * * If <code> m_tuneInterpolationParameter </code> and <code> * m_interpolationParameterValid </code> are both <code> true </code>, * then <code> m_s </code> should always be between * <code> m_sLower </code> and <code> m_sUpper </code>. */ private boolean m_interpolationParameterValid = false; /** * Constant to switch between balanced and unbalanced OSDL. * <code> true </code> means that one chooses balanced OSDL * (default: <code> false </code>). */ private boolean m_balanced = false; /** * Constant to choose the weighted variant of the OSDL algorithm. */ private boolean m_weighted = false; /** * Coordinates representing the smallest element of the data space. */ private Coordinates smallestElement; /** * Coordinates representing the biggest element of the data space. */ private Coordinates biggestElement; /** * Returns a string describing the classifier. * @return a description suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "This class is an implementation of the Ordinal Stochastic " + "Dominance Learner.\n" + "Further information regarding the OSDL-algorithm can be found in:\n\n" + getTechnicalInformation().toString() + "\n\n" + "For more information about supervised ranking, see\n\n" + "http://users.ugent.be/~slievens/supervised_ranking.php"; } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "S. Lievens and B. De Baets and K. Cao-Van"); result.setValue(Field.YEAR, "2006"); result.setValue(Field.TITLE, "A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting"); result.setValue(Field.JOURNAL, "Annals of Operations Research"); additional = result.add(Type.PHDTHESIS); additional.setValue(Field.AUTHOR, "Kim Cao-Van"); additional.setValue(Field.YEAR, "2003"); additional.setValue(Field.TITLE, "Supervised ranking: from semantics to algorithms"); additional.setValue(Field.SCHOOL, "Ghent University"); additional = result.add(Type.MASTERSTHESIS); additional.setValue(Field.AUTHOR, "Stijn Lievens"); additional.setValue(Field.YEAR, "2004"); additional.setValue(Field.TITLE, "Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken"); additional.setValue(Field.SCHOOL, "Ghent University"); return result; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); // instances result.setMinimumNumberInstances(0); return result; } /** * Classifies a given instance using the current settings * of the classifier. * * @param instance the instance to be classified * @throws Exception if for some reason no distribution * could be predicted * @return the classification for the instance. Depending on the * settings of the classifier this is a double representing * a classlabel (internal WEKA format) or a real value in the sense * of regression. */ public double classifyInstance(Instance instance) throws Exception { try { return classifyInstance(instance, m_s, m_ctype); } catch (IllegalArgumentException e) { throw new AssertionError(e); } } /** * Classifies a given instance using the settings in the paramater * list. This doesn't change the internal settings of the classifier. * In particular the interpolationparameter <code> m_s </code> * and the classification type <code> m_ctype </code> are not changed. * * @param instance the instance to be classified * @param s the value of the interpolationparameter to be used * @param ctype the classification type to be used * @throws IllegalStateException for some reason no distribution * could be predicted * @throws IllegalArgumentException if the interpolation parameter or the * classification type is not valid * @return the label assigned to the instance. It is given in internal floating point format. */ private double classifyInstance(Instance instance, double s, int ctype) throws IllegalArgumentException, IllegalStateException { if (s < 0 || s > 1) { throw new IllegalArgumentException("Interpolation parameter is not valid " + s); } DiscreteDistribution dist = null; if (!m_balanced) { dist = distributionForInstance(instance, s); } else { dist = distributionForInstanceBalanced(instance, s); } if (dist == null) { throw new IllegalStateException("Null distribution predicted"); } double value = 0; switch(ctype) { case CT_REGRESSION: case CT_WEIGHTED_SUM: value = dist.mean(); if (ctype == CT_WEIGHTED_SUM) { value = Math.round(value); } break; case CT_MAXPROB: value = dist.modes()[0]; break; case CT_MEDIAN: case CT_MEDIAN_REAL: value = dist.median(); if (ctype == CT_MEDIAN) { value = Math.round(value); } break; default: throw new IllegalArgumentException("Not a valid classification type!"); } return value; } /** * Calculates the class probabilities for the given test instance. * Uses the current settings of the parameters if these are valid. * If necessary it updates the interpolationparameter first, and hence * this may change the classifier. * * @param instance the instance to be classified
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -