⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 osdlcore.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    OSDLCore.java *    Copyright (C) 2004 Stijn Lievens */package weka.classifiers.misc.monotone;import weka.classifiers.Classifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.estimators.DiscreteEstimator;import java.util.Arrays;import java.util.Enumeration;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Vector;/** <!-- globalinfo-start --> * This class is an implementation of the Ordinal Stochastic Dominance Learner.<br/> * Further information regarding the OSDL-algorithm can be found in:<br/> * <br/> * S. Lievens, B. De Baets, K. Cao-Van (2006). A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting. Annals of Operations Research..<br/> * <br/> * Kim Cao-Van (2003). Supervised ranking: from semantics to algorithms.<br/> * <br/> * Stijn Lievens (2004). Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.<br/> * <br/> * For more information about supervised ranking, see<br/> * <br/> * http://users.ugent.be/~slievens/supervised_ranking.php * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{Lievens2006, *    author = {S. Lievens and B. De Baets and K. Cao-Van}, *    journal = {Annals of Operations Research}, *    title = {A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting}, *    year = {2006} * } *  * &#64;phdthesis{Cao-Van2003, *    author = {Kim Cao-Van}, *    school = {Ghent University}, *    title = {Supervised ranking: from semantics to algorithms}, *    year = {2003} * } *  * &#64;mastersthesis{Lievens2004, *    author = {Stijn Lievens}, *    school = {Ghent University}, *    title = {Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken}, *    year = {2004} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  * <pre> -C &lt;REG|WSUM|MAX|MED|RMED&gt; *  Sets the classification type to be used. *  (Default: MED)</pre> *  * <pre> -B *  Use the balanced version of the Ordinal Stochastic Dominance Learner</pre> *  * <pre> -W *  Use the weighted version of the Ordinal Stochastic Dominance Learner</pre> *  * <pre> -S &lt;value of interpolation parameter&gt; *  Sets the value of the interpolation parameter (not with -W/T/P/L/U) *  (default: 0.5).</pre> *  * <pre> -T *  Tune the interpolation parameter (not with -W/S) *  (default: off)</pre> *  * <pre> -L &lt;Lower bound for interpolation parameter&gt; *  Lower bound for the interpolation parameter (not with -W/S) *  (default: 0)</pre> *  * <pre> -U &lt;Upper bound for interpolation parameter&gt; *  Upper bound for the interpolation parameter (not with -W/S) *  (default: 1)</pre> *  * <pre> -P &lt;Number of parts&gt; *  Determines the step size for tuning the interpolation *  parameter, nl. (U-L)/P (not with -W/S) *  (default: 10)</pre> *  <!-- options-end --> * * @author Stijn Lievens (stijn.lievens@ugent.be) * @version $Revision: 1.1 $ */public abstract class OSDLCore  extends Classifier   implements TechnicalInformationHandler {  /** for serialization */  private static final long serialVersionUID = -9209888846680062897L;  /**   * Constant indicating that the classification type is    * regression (probabilistic weighted sum).   */  public static final int CT_REGRESSION = 0;  /**   * Constant indicating that the classification type is     * the probabilistic weighted sum.   */  public static final int CT_WEIGHTED_SUM = 1;  /**   * Constant indicating that the classification type is     * the mode of the distribution.   */  public static final int CT_MAXPROB = 2;  /**    * Constant indicating that the classification type is     * the median.   */  public static final int CT_MEDIAN = 3;  /**    *  Constant indicating that the classification type is   *  the median, but not rounded to the nearest class.   */  public static final int CT_MEDIAN_REAL = 4;  /** the classification types */  public static final Tag[] TAGS_CLASSIFICATIONTYPES = {    new Tag(CT_REGRESSION, "REG", "Regression"),    new Tag(CT_WEIGHTED_SUM, "WSUM", "Weighted Sum"),    new Tag(CT_MAXPROB, "MAX", "Maximum probability"),    new Tag(CT_MEDIAN, "MED", "Median"),    new Tag(CT_MEDIAN_REAL, "RMED", "Median without rounding")  };  /**   * The classification type, by default set to CT_MEDIAN.   */  private int m_ctype = CT_MEDIAN;  /**    * The training examples.   */  private Instances m_train;  /**    * Collection of (Coordinates,DiscreteEstimator) pairs.   * This Map is build from the training examples.   * The DiscreteEstimator is over the classes.   * Each DiscreteEstimator indicates how many training examples   * there are with the specified classes.   */  private Map m_estimatedDistributions;  /**    * Collection of (Coordinates,CumulativeDiscreteDistribution) pairs.   * This Map is build from the training examples, and more    * specifically from the previous map.     */  private Map m_estimatedCumulativeDistributions;  /**    * The interpolationparameter s.     * By default set to 1/2.   */  private double m_s = 0.5;  /**    * Lower bound for the interpolationparameter s.   * Default value is 0.   */  private double m_sLower = 0.;  /**    * Upper bound for the interpolationparameter s.   * Default value is 1.   */  private double m_sUpper = 1.0;  /**    * The number of parts the interval [m_sLower,m_sUpper] is    * divided in, while searching for the best parameter s.   * This thus determines the granularity of the search.   * m_sNrParts + 1 values of the interpolationparameter will   * be tested.   */  private int m_sNrParts = 10;  /**    * Indicates whether the interpolationparameter is to be tuned    * using leave-one-out cross validation.  <code> true </code> if   * this is the case (default is <code> false </code>).   */  private boolean m_tuneInterpolationParameter = false;  /**   * Indicates whether the current value of the interpolationparamter   * is valid.  More specifically if <code>    * m_tuneInterpolationParameter == true </code>, and    * <code> m_InterpolationParameter == false </code>,    * this means that the current interpolation parameter is not valid.   * This parameter is only relevant if <code> m_tuneInterpolationParameter   * == true </code>.   *   * If <code> m_tuneInterpolationParameter </code> and <code>   * m_interpolationParameterValid </code> are both <code> true </code>,   * then <code> m_s </code> should always be between    * <code> m_sLower </code> and <code> m_sUpper </code>.    */  private boolean m_interpolationParameterValid = false;  /**    * Constant to switch between balanced and unbalanced OSDL.   * <code> true </code> means that one chooses balanced OSDL   * (default: <code> false </code>).   */  private boolean m_balanced = false;  /**    * Constant to choose the weighted variant of the OSDL algorithm.   */  private boolean m_weighted = false;  /**   * Coordinates representing the smallest element of the data space.   */  private Coordinates smallestElement;  /**   * Coordinates representing the biggest element of the data space.   */  private Coordinates biggestElement;  /**   * Returns a string describing the classifier.   * @return a description suitable for displaying in the    * explorer/experimenter gui   */  public String globalInfo() {    return "This class is an implementation of the Ordinal Stochastic "    + "Dominance Learner.\n"     + "Further information regarding the OSDL-algorithm can be found in:\n\n"    + getTechnicalInformation().toString() + "\n\n"    + "For more information about supervised ranking, see\n\n"    + "http://users.ugent.be/~slievens/supervised_ranking.php";  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation result;    TechnicalInformation additional;    result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "S. Lievens and B. De Baets and K. Cao-Van");    result.setValue(Field.YEAR, "2006");    result.setValue(Field.TITLE, "A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting");    result.setValue(Field.JOURNAL, "Annals of Operations Research");    additional = result.add(Type.PHDTHESIS);    additional.setValue(Field.AUTHOR, "Kim Cao-Van");    additional.setValue(Field.YEAR, "2003");    additional.setValue(Field.TITLE, "Supervised ranking: from semantics to algorithms");    additional.setValue(Field.SCHOOL, "Ghent University");    additional = result.add(Type.MASTERSTHESIS);    additional.setValue(Field.AUTHOR, "Stijn Lievens");    additional.setValue(Field.YEAR, "2004");    additional.setValue(Field.TITLE, "Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken");    additional.setValue(Field.SCHOOL, "Ghent University");    return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    // instances    result.setMinimumNumberInstances(0);    return result;  }  /**   * Classifies a given instance using the current settings    * of the classifier.   *   * @param instance the instance to be classified   * @throws Exception if for some reason no distribution   *         could be predicted   * @return the classification for the instance.  Depending on the   * settings of the classifier this is a double representing    * a classlabel (internal WEKA format) or a real value in the sense   * of regression.   */  public double classifyInstance(Instance instance)    throws Exception {         try {      return classifyInstance(instance, m_s, m_ctype);    } catch (IllegalArgumentException e) {      throw new AssertionError(e);    }  }  /**    * Classifies a given instance using the settings in the paramater   * list.  This doesn't change the internal settings of the classifier.   * In particular the interpolationparameter <code> m_s </code>   * and the classification type <code> m_ctype </code> are not changed.   *   * @param instance the instance to be classified   * @param s the value of the interpolationparameter to be used   * @param ctype the classification type to be used     * @throws IllegalStateException for some reason no distribution   *         could be predicted   * @throws IllegalArgumentException if the interpolation parameter or the   *         classification type is not valid    * @return the label assigned to the instance.  It is given in internal floating point format.   */  private double classifyInstance(Instance instance, double s, int ctype)     throws IllegalArgumentException, IllegalStateException {        if (s < 0 || s > 1) {      throw new IllegalArgumentException("Interpolation parameter is not valid " + s);    }    DiscreteDistribution dist = null;    if (!m_balanced) {      dist = distributionForInstance(instance, s);    } else {      dist = distributionForInstanceBalanced(instance, s);    }    if (dist == null) {      throw new IllegalStateException("Null distribution predicted");    }    double value = 0;    switch(ctype) {      case CT_REGRESSION:      case CT_WEIGHTED_SUM:	value = dist.mean();	if (ctype == CT_WEIGHTED_SUM) {	  value = Math.round(value);	}	break;      case CT_MAXPROB:	value = dist.modes()[0];	break;      case CT_MEDIAN:      case CT_MEDIAN_REAL:	value = dist.median();	if (ctype == CT_MEDIAN) {	  value = Math.round(value);	}	break;      default:	throw new IllegalArgumentException("Not a valid classification type!");     }    return value;  }  /**   * Calculates the class probabilities for the given test instance.   * Uses the current settings of the parameters if these are valid.   * If necessary it updates the interpolationparameter first, and hence    * this may change the classifier.   *   * @param instance the instance to be classified

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -