⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancesutil.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    InstancesUtil.java *    Copyright (C) 2004 Stijn Lievens * */package weka.classifiers.misc.monotone;import weka.classifiers.Classifier;import weka.core.Instance;import weka.core.Instances;import weka.core.Utils;import weka.estimators.DiscreteEstimator;import java.io.BufferedWriter;import java.io.IOException;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Random;/** * This class contains some methods for working with objects of  * type <code> Instance </code> and <code> Instances, </code> not * provided by there respective classes. * <p> * This implementation is part of the master's thesis: "Studie * en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd * rangschikken", Stijn Lievens, Ghent University, 2004.  * </p> *  * @author Stijn Lievens (stijn.lievens@ugent.be) * @version $Revision: 1.1 $ */public class InstancesUtil {  /**   * Compares two instances, ignoring the class attribute (if any)   *   * @param i1 the first instance   * @param i2 the second instance   * @return true if both instances are equal (ignoring the class   * attribute), false otherwise   */  public static boolean equalIgnoreClass(Instance i1, Instance i2) {    int n = i1.numAttributes();    int classIndex = i1.classIndex();    if (i2.numAttributes() != n || classIndex != i2.classIndex()) {      return false;    }    int i = 0;    while(i < n && (i == classIndex 	|| Utils.eq(i1.value(i), i2.value(i)))) {      i++;    }    return i == n;  }  /**   * Get the index of an instance in a set of instances, where    * instances are compared ignoring the class attribute.   *   * @param instances the set of instances   * @param instance to instance to be found in the given set of instances   * @return the index of the first instance that equals the given instance   * (ignoring the class attribute), -1 if the instance was not found   */  public static int containsIgnoreClass(Instances instances, Instance instance) {    double[] dd = instance.toDoubleArray();    int classIndex = instances.classIndex();    int n = instances.numAttributes();    Iterator it =       new EnumerationIterator(instances.enumerateInstances());    int index = 0;    while(it.hasNext()) {      Instance tmp = (Instance) it.next();      int i = 0;      while(i < n && 	  (i == classIndex || Utils.eq(dd[i], tmp.value(i)))) {	i++;      }      if (i == n) { 	return index;  // found it      }      index++;    }    return -1;  }  /**   * Find the next occurence of an instance, ignoring the class,   * for which the index in the dataset is at least <code> index. </code>   *   * @param instances the set of instances to be searched   * @param instance the instance to be found   * @param index the minimum index that might be returned   * @return the index of the first instance with index at    * least <code> index </code> that equals the given instance   * (ignoring the class attribute), -1 if the instance was not found   */  public static int nextOccurenceIgnoreClass(Instances instances, Instance instance, int index) {    double[] dd = instance.toDoubleArray();    int classIndex = instances.classIndex();    int n = instances.numAttributes();    int numInstances = instances.numInstances();    int currentIndex = index;    while(currentIndex < numInstances) {      Instance tmp = instances.instance(currentIndex);      int i = 0;      while(i < n && 	  (i == classIndex || Utils.eq(dd[i], tmp.value(i)))) {	i++;      }      if (i == n) { 	return currentIndex;  // found it      }      currentIndex++;    }    return -1; // not present  }  /**   * Check if all instances have the same class value.   *   * @param instances the instances to be checked for homogeneity   * @return true if the instances have the same class value, false otherwise    */  public static boolean isHomogeneous(Instances instances) {    Iterator it = new EnumerationIterator(instances.enumerateInstances());    if (it.hasNext()) {      double classValue = ((Instance) it.next()).classValue();      while(it.hasNext()) {	if (((Instance) it.next()).classValue() != classValue) {	  return false;	}      }    }    return true; // empty or all identical  }  /**   * Compares two instances in the data space, this is ignoring the class   * attribute.  An instance is strictly smaller than another instance   * if the same holds for the <code> Coordinates </code> based on   * these instances.   *   * @param i1 the first instance   * @param i2 the second instance   * @return <code> true </code> if the first instance is strictly smaller   * than the second instance, <code> false </code> otherwise   */  public static boolean strictlySmaller(Instance i1, Instance i2) {    // XXX implementation can be done faster    Coordinates c1 = new Coordinates(i1);    Coordinates c2 = new Coordinates(i2);    return c1.strictlySmaller(c2);  }  /**   * Compares two instances in the data space, this is, ignoring the class   * attribute.  An instance is smaller or equal than another instance   * if the same holds for the <code> Coordinates </code> based on   * these instances.   *   * @param i1 the first instance   * @param i2 the second instance   * @return <code> true </code> if the first instance is smaller or equal   * than the second instance, <code> false </code> otherwise   */  public static boolean smallerOrEqual(Instance i1,Instance i2) {    // XXX implementation can be done faster    Coordinates c1 = new Coordinates(i1);    Coordinates c2 = new Coordinates(i2);    return c1.smallerOrEqual(c2);  }  /**   * Checks if two instances are comparable in the data space, this is    * ignoring the class attribute.  Two instances are comparable if the   * first is smaller or equal than the second, or the other way around.   *   * @param i1 the first instance   * @param i2 the second instance   * @return <code> true </code> if the given instances are comparable,   * <code> false </code> otherwise   * @throws IllegalArgumentException if the two instances don't have the   * same length   */  public static boolean comparable(Instance i1, Instance i2) throws IllegalArgumentException {    // XXX maybe we should think about using 'equalHeaders' of Instance    // to obtain a fool proof implementation    Coordinates c1 = new Coordinates(i1);    Coordinates c2 = new Coordinates(i2);    return c1.smallerOrEqual(c2) || c2.smallerOrEqual(c1);  }  /**   * Checks it two instances give rise to doubt.  There is doubt between   * two instances if their <code> Coordinates </code> are equal, but    * their class value is different.   *   * @param i1 the first instance   * @param i2 the second instance   * @return <code> true </code> if there is doubt between the two    * given instances, <code> false </code> otherwise   */  public static boolean doubt(Instance i1, Instance i2) {    // XXX use equalHeaders ?    if (i1.classValue() == i2.classValue()) {      return false;    }    Coordinates c1 = new Coordinates(i1);    Coordinates c2 = new Coordinates(i2);    return c1.equals(c2);   }  /**    *  Checks if two instances give rise to reversed preference.     *  Two instances give rise to reversed preference in the data space,   *  if their <code> Coordinates </code> are comparable but different,   *  and their class values are not related in the same way.   *     *  @param i1 the first instance   *  @param i2 the second instance   *  @return <code> true </code> if <code> i1 </code> and <code> i2 </code>   *  give rise to reversed preference, <code> false </code> otherwise   *  @throws IllegalArgumentException if the two instances don't have    *  the same length   */  public static boolean reversedPreference(Instance i1, Instance i2) throws IllegalArgumentException {    // XXX should the implementation be made fool proof by use of     // 'equalHeaders'?  It can also be speeded up I think.    if (i1.classValue() == i2.classValue()) {      return false;    }    Coordinates c1 = new Coordinates(i1);    Coordinates c2 = new Coordinates(i2);    if (i1.classValue() > i2.classValue() && c1.strictlySmaller(c2)) {      return true;    }    if (i2.classValue() > i1.classValue() && c2.strictlySmaller(c1)) {      return true;    }    return false;  }  /**    *  Checks if the given data set is monotone.  We say that a data set   *  is monotone if it contains doubt nor reversed preferences.   *   *  @param instances the data set to be checked   *  @return <code> true </code> if the given data set if monotone,   *  <code> false </code> otherwise   */    public static boolean isMonotone(Instances instances) {    int n = instances.numInstances();    for (int i = 0; i < n; i++) {      Instance i1 = instances.instance(i);      for (int j = i + 1; j < n; j++) {	if ( doubt(i1, instances.instance(j)) ||	    reversedPreference(i1, instances.instance(j))) {	  return false;	}      }    }    return true;  }  /**   * Test if a set of instances is quasi monotone.  We say that a set   * of instances <code> S </code> is quasi monotone with respect to    * a set of instances <code> D </code> iff    * <code> [x,y] \cap D \neq \emptyset \implies class(x) \leq class(y).   * </code>  This implies that <code> D </code> itself is monotone.   *   * @param ground the instances playing the role of <code> D </code>   * @param other the instances playing the role of <code> S </code>   * @return true if the instances are quasi monotone, false otherwise   */  public static boolean isQuasiMonotone(Instances ground, Instances other) {    if (!isMonotone(ground)) {      return false;    }    Iterator it1 = new EnumerationIterator(ground.enumerateInstances());    while(it1.hasNext()) {      Instance inst1 = (Instance) it1.next();      Iterator it2 = new EnumerationIterator(other.enumerateInstances());      while(it2.hasNext()) {	Instance inst2 = (Instance) it2.next();	if (doubt(inst1, inst2) || reversedPreference(inst1, inst2)) {	  return false;	}      }    }    return true;  }  /**    * Gather some statistics regarding reversed preferences.    *    * @param instances the instances to be examined   * @return array of length 3; position 0 indicates the number of    *  couples that have reversed preference, position 1 the number of    *  couples that are comparable, and position 2 the total   *  number of couples   * @see #reversedPreference(Instance, Instance)   */    public static int[] nrOfReversedPreferences(Instances instances) {    int[] stats = new int[3];    int n = instances.numInstances();    stats[0] = 0;    stats[1] = 0;    // number of couples    stats[2] = n * (n - 1) / 2;     for (int i = 0; i < n; i++) {      Instance i1 = instances.instance(i);      for (int j = i + 1; j < n; j++) {	Instance j1 = instances.instance(j);	if (comparable(i1, j1)) {	  stats[1]++;  // comparable	  if (reversedPreference(i1, j1)) {	    stats[0]++; // reversed preference	  }	}      }    }    return stats;  }  /**   * Find the number of stochastic reversed preferences in the dataset.   *   * @param instances the instances to be examined   * @return an array of integers containing at position   * <ul>   * <li> 0: number of different coordinates, this is the size of S_X </li>    * <li> 1: number of couples showing reversed preference:<br>    * <code> x &lt; y </code> and   *        <code> not (F_x leqstoch F_y) </code> </li>   * <li> 2: number of couples having<br>   *   <code> x &lt; y </code> and <code> F_y leqstoch F_x </code>   *                                  and <code> F_x neq F_y </code> </li>   * <li> 3: number of couples that are comparable <br>   *          <code> |\{ (x,y)\in S_X \times S_x | x &lt; y\}| </code> </li>   * <li> 4: number of couples in S_X </li>   * </ul>   * @throws IllegalArgumentException if there are no instances with   * a non-missing class value, or if the class is not set   */  public static int[] nrStochasticReversedPreference(Instances instances)    throws IllegalArgumentException {    if (instances.classIndex() < 0) {      throw new IllegalArgumentException("Class is not set");    }    // copy the dataset     Instances data = new Instances(instances);    // new dataset where examples with missing class value are removed    data.deleteWithMissingClass();    if (data.numInstances() == 0) {      throw new IllegalArgumentException      ("No instances with a class value!");    }    // build the Map for the estimatedDistributions     Map distributions = new HashMap(data.numInstances()/2);    // cycle through all instances     Iterator i =       new EnumerationIterator(instances.enumerateInstances());    while (i.hasNext()) {       Instance instance = (Instance) i.next();      Coordinates c = new Coordinates(instance);      // get DiscreteEstimator from the map      DiscreteEstimator df = 	(DiscreteEstimator) distributions.get(c);      // if no DiscreteEstimator is present in the map, create one       if (df == null) {	df = new DiscreteEstimator(instances.numClasses(), 0);      }      df.addValue(instance.classValue(),instance.weight()); // update      distributions.put(c,df); // put back in map    }    // build the map of cumulative distribution functions     Map cumulativeDistributions =       new HashMap(distributions.size());    // Cycle trough the map of discrete distributions, and create a new    // one containing cumulative discrete distributions    for (Iterator it=distributions.keySet().iterator();    it.hasNext();) {      Coordinates c = (Coordinates) it.next();      DiscreteEstimator df = 	(DiscreteEstimator) distributions.get(c);      cumulativeDistributions.put      (c, new CumulativeDiscreteDistribution(df));    }    int[] revPref = new int[5];     revPref[0] = cumulativeDistributions.size();    Iterator it = cumulativeDistributions.keySet().iterator();    while (it.hasNext()) {      Coordinates c1 = (Coordinates) it.next();      CumulativeDiscreteDistribution cdf1 = 	(CumulativeDiscreteDistribution) 	cumulativeDistributions.get(c1);      Iterator it2 = cumulativeDistributions.keySet().iterator();      while(it2.hasNext()) {	Coordinates c2 = (Coordinates) it2.next();	CumulativeDiscreteDistribution cdf2 = 	  (CumulativeDiscreteDistribution)	  cumulativeDistributions.get(c2);	if (c2.equals(c1)) {	  continue;	}	revPref[4]++;	if (c1.strictlySmaller(c2) == true) {	  revPref[3]++; //vergelijkbaar	  if (cdf1.stochasticDominatedBy(cdf2) == false ) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -