📄 instancesutil.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * InstancesUtil.java * Copyright (C) 2004 Stijn Lievens * */package weka.classifiers.misc.monotone;import weka.classifiers.Classifier;import weka.core.Instance;import weka.core.Instances;import weka.core.Utils;import weka.estimators.DiscreteEstimator;import java.io.BufferedWriter;import java.io.IOException;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Random;/** * This class contains some methods for working with objects of * type <code> Instance </code> and <code> Instances, </code> not * provided by there respective classes. * <p> * This implementation is part of the master's thesis: "Studie * en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd * rangschikken", Stijn Lievens, Ghent University, 2004. * </p> * * @author Stijn Lievens (stijn.lievens@ugent.be) * @version $Revision: 1.1 $ */public class InstancesUtil { /** * Compares two instances, ignoring the class attribute (if any) * * @param i1 the first instance * @param i2 the second instance * @return true if both instances are equal (ignoring the class * attribute), false otherwise */ public static boolean equalIgnoreClass(Instance i1, Instance i2) { int n = i1.numAttributes(); int classIndex = i1.classIndex(); if (i2.numAttributes() != n || classIndex != i2.classIndex()) { return false; } int i = 0; while(i < n && (i == classIndex || Utils.eq(i1.value(i), i2.value(i)))) { i++; } return i == n; } /** * Get the index of an instance in a set of instances, where * instances are compared ignoring the class attribute. * * @param instances the set of instances * @param instance to instance to be found in the given set of instances * @return the index of the first instance that equals the given instance * (ignoring the class attribute), -1 if the instance was not found */ public static int containsIgnoreClass(Instances instances, Instance instance) { double[] dd = instance.toDoubleArray(); int classIndex = instances.classIndex(); int n = instances.numAttributes(); Iterator it = new EnumerationIterator(instances.enumerateInstances()); int index = 0; while(it.hasNext()) { Instance tmp = (Instance) it.next(); int i = 0; while(i < n && (i == classIndex || Utils.eq(dd[i], tmp.value(i)))) { i++; } if (i == n) { return index; // found it } index++; } return -1; } /** * Find the next occurence of an instance, ignoring the class, * for which the index in the dataset is at least <code> index. </code> * * @param instances the set of instances to be searched * @param instance the instance to be found * @param index the minimum index that might be returned * @return the index of the first instance with index at * least <code> index </code> that equals the given instance * (ignoring the class attribute), -1 if the instance was not found */ public static int nextOccurenceIgnoreClass(Instances instances, Instance instance, int index) { double[] dd = instance.toDoubleArray(); int classIndex = instances.classIndex(); int n = instances.numAttributes(); int numInstances = instances.numInstances(); int currentIndex = index; while(currentIndex < numInstances) { Instance tmp = instances.instance(currentIndex); int i = 0; while(i < n && (i == classIndex || Utils.eq(dd[i], tmp.value(i)))) { i++; } if (i == n) { return currentIndex; // found it } currentIndex++; } return -1; // not present } /** * Check if all instances have the same class value. * * @param instances the instances to be checked for homogeneity * @return true if the instances have the same class value, false otherwise */ public static boolean isHomogeneous(Instances instances) { Iterator it = new EnumerationIterator(instances.enumerateInstances()); if (it.hasNext()) { double classValue = ((Instance) it.next()).classValue(); while(it.hasNext()) { if (((Instance) it.next()).classValue() != classValue) { return false; } } } return true; // empty or all identical } /** * Compares two instances in the data space, this is ignoring the class * attribute. An instance is strictly smaller than another instance * if the same holds for the <code> Coordinates </code> based on * these instances. * * @param i1 the first instance * @param i2 the second instance * @return <code> true </code> if the first instance is strictly smaller * than the second instance, <code> false </code> otherwise */ public static boolean strictlySmaller(Instance i1, Instance i2) { // XXX implementation can be done faster Coordinates c1 = new Coordinates(i1); Coordinates c2 = new Coordinates(i2); return c1.strictlySmaller(c2); } /** * Compares two instances in the data space, this is, ignoring the class * attribute. An instance is smaller or equal than another instance * if the same holds for the <code> Coordinates </code> based on * these instances. * * @param i1 the first instance * @param i2 the second instance * @return <code> true </code> if the first instance is smaller or equal * than the second instance, <code> false </code> otherwise */ public static boolean smallerOrEqual(Instance i1,Instance i2) { // XXX implementation can be done faster Coordinates c1 = new Coordinates(i1); Coordinates c2 = new Coordinates(i2); return c1.smallerOrEqual(c2); } /** * Checks if two instances are comparable in the data space, this is * ignoring the class attribute. Two instances are comparable if the * first is smaller or equal than the second, or the other way around. * * @param i1 the first instance * @param i2 the second instance * @return <code> true </code> if the given instances are comparable, * <code> false </code> otherwise * @throws IllegalArgumentException if the two instances don't have the * same length */ public static boolean comparable(Instance i1, Instance i2) throws IllegalArgumentException { // XXX maybe we should think about using 'equalHeaders' of Instance // to obtain a fool proof implementation Coordinates c1 = new Coordinates(i1); Coordinates c2 = new Coordinates(i2); return c1.smallerOrEqual(c2) || c2.smallerOrEqual(c1); } /** * Checks it two instances give rise to doubt. There is doubt between * two instances if their <code> Coordinates </code> are equal, but * their class value is different. * * @param i1 the first instance * @param i2 the second instance * @return <code> true </code> if there is doubt between the two * given instances, <code> false </code> otherwise */ public static boolean doubt(Instance i1, Instance i2) { // XXX use equalHeaders ? if (i1.classValue() == i2.classValue()) { return false; } Coordinates c1 = new Coordinates(i1); Coordinates c2 = new Coordinates(i2); return c1.equals(c2); } /** * Checks if two instances give rise to reversed preference. * Two instances give rise to reversed preference in the data space, * if their <code> Coordinates </code> are comparable but different, * and their class values are not related in the same way. * * @param i1 the first instance * @param i2 the second instance * @return <code> true </code> if <code> i1 </code> and <code> i2 </code> * give rise to reversed preference, <code> false </code> otherwise * @throws IllegalArgumentException if the two instances don't have * the same length */ public static boolean reversedPreference(Instance i1, Instance i2) throws IllegalArgumentException { // XXX should the implementation be made fool proof by use of // 'equalHeaders'? It can also be speeded up I think. if (i1.classValue() == i2.classValue()) { return false; } Coordinates c1 = new Coordinates(i1); Coordinates c2 = new Coordinates(i2); if (i1.classValue() > i2.classValue() && c1.strictlySmaller(c2)) { return true; } if (i2.classValue() > i1.classValue() && c2.strictlySmaller(c1)) { return true; } return false; } /** * Checks if the given data set is monotone. We say that a data set * is monotone if it contains doubt nor reversed preferences. * * @param instances the data set to be checked * @return <code> true </code> if the given data set if monotone, * <code> false </code> otherwise */ public static boolean isMonotone(Instances instances) { int n = instances.numInstances(); for (int i = 0; i < n; i++) { Instance i1 = instances.instance(i); for (int j = i + 1; j < n; j++) { if ( doubt(i1, instances.instance(j)) || reversedPreference(i1, instances.instance(j))) { return false; } } } return true; } /** * Test if a set of instances is quasi monotone. We say that a set * of instances <code> S </code> is quasi monotone with respect to * a set of instances <code> D </code> iff * <code> [x,y] \cap D \neq \emptyset \implies class(x) \leq class(y). * </code> This implies that <code> D </code> itself is monotone. * * @param ground the instances playing the role of <code> D </code> * @param other the instances playing the role of <code> S </code> * @return true if the instances are quasi monotone, false otherwise */ public static boolean isQuasiMonotone(Instances ground, Instances other) { if (!isMonotone(ground)) { return false; } Iterator it1 = new EnumerationIterator(ground.enumerateInstances()); while(it1.hasNext()) { Instance inst1 = (Instance) it1.next(); Iterator it2 = new EnumerationIterator(other.enumerateInstances()); while(it2.hasNext()) { Instance inst2 = (Instance) it2.next(); if (doubt(inst1, inst2) || reversedPreference(inst1, inst2)) { return false; } } } return true; } /** * Gather some statistics regarding reversed preferences. * * @param instances the instances to be examined * @return array of length 3; position 0 indicates the number of * couples that have reversed preference, position 1 the number of * couples that are comparable, and position 2 the total * number of couples * @see #reversedPreference(Instance, Instance) */ public static int[] nrOfReversedPreferences(Instances instances) { int[] stats = new int[3]; int n = instances.numInstances(); stats[0] = 0; stats[1] = 0; // number of couples stats[2] = n * (n - 1) / 2; for (int i = 0; i < n; i++) { Instance i1 = instances.instance(i); for (int j = i + 1; j < n; j++) { Instance j1 = instances.instance(j); if (comparable(i1, j1)) { stats[1]++; // comparable if (reversedPreference(i1, j1)) { stats[0]++; // reversed preference } } } } return stats; } /** * Find the number of stochastic reversed preferences in the dataset. * * @param instances the instances to be examined * @return an array of integers containing at position * <ul> * <li> 0: number of different coordinates, this is the size of S_X </li> * <li> 1: number of couples showing reversed preference:<br> * <code> x < y </code> and * <code> not (F_x leqstoch F_y) </code> </li> * <li> 2: number of couples having<br> * <code> x < y </code> and <code> F_y leqstoch F_x </code> * and <code> F_x neq F_y </code> </li> * <li> 3: number of couples that are comparable <br> * <code> |\{ (x,y)\in S_X \times S_x | x < y\}| </code> </li> * <li> 4: number of couples in S_X </li> * </ul> * @throws IllegalArgumentException if there are no instances with * a non-missing class value, or if the class is not set */ public static int[] nrStochasticReversedPreference(Instances instances) throws IllegalArgumentException { if (instances.classIndex() < 0) { throw new IllegalArgumentException("Class is not set"); } // copy the dataset Instances data = new Instances(instances); // new dataset where examples with missing class value are removed data.deleteWithMissingClass(); if (data.numInstances() == 0) { throw new IllegalArgumentException ("No instances with a class value!"); } // build the Map for the estimatedDistributions Map distributions = new HashMap(data.numInstances()/2); // cycle through all instances Iterator i = new EnumerationIterator(instances.enumerateInstances()); while (i.hasNext()) { Instance instance = (Instance) i.next(); Coordinates c = new Coordinates(instance); // get DiscreteEstimator from the map DiscreteEstimator df = (DiscreteEstimator) distributions.get(c); // if no DiscreteEstimator is present in the map, create one if (df == null) { df = new DiscreteEstimator(instances.numClasses(), 0); } df.addValue(instance.classValue(),instance.weight()); // update distributions.put(c,df); // put back in map } // build the map of cumulative distribution functions Map cumulativeDistributions = new HashMap(distributions.size()); // Cycle trough the map of discrete distributions, and create a new // one containing cumulative discrete distributions for (Iterator it=distributions.keySet().iterator(); it.hasNext();) { Coordinates c = (Coordinates) it.next(); DiscreteEstimator df = (DiscreteEstimator) distributions.get(c); cumulativeDistributions.put (c, new CumulativeDiscreteDistribution(df)); } int[] revPref = new int[5]; revPref[0] = cumulativeDistributions.size(); Iterator it = cumulativeDistributions.keySet().iterator(); while (it.hasNext()) { Coordinates c1 = (Coordinates) it.next(); CumulativeDiscreteDistribution cdf1 = (CumulativeDiscreteDistribution) cumulativeDistributions.get(c1); Iterator it2 = cumulativeDistributions.keySet().iterator(); while(it2.hasNext()) { Coordinates c2 = (Coordinates) it2.next(); CumulativeDiscreteDistribution cdf2 = (CumulativeDiscreteDistribution) cumulativeDistributions.get(c2); if (c2.equals(c1)) { continue; } revPref[4]++; if (c1.strictlySmaller(c2) == true) { revPref[3]++; //vergelijkbaar if (cdf1.stochasticDominatedBy(cdf2) == false ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -