📄 instancesutil.java
字号:
revPref[1]++; if (cdf2.stochasticDominatedBy(cdf1) == true) { revPref[2]++; } } } } } revPref[4] /= 2; return revPref; } /** * Counts the number of redundant pairs in the sense of OLM. * Two instances are redundant if they are comparable and have the same * class value. * * @param instances the instances to be checked * @return the number of redundant pairs in the given set of instances */ public static int nrOfRedundant(Instances instances) { int n = instances.numInstances(); int nrRedundant = 0; for (int i = 0; i < n; i++) { Instance i1 = instances.instance(i); for (int j = i + 1; j < n; j++) { Instance j1 = instances.instance(j); if (j1.classValue() == i1.classValue() && comparable(i1, j1) ) { nrRedundant++; } } } return nrRedundant; } /** * Calulates the total loss over the <code> instances </code>, * using the trained <code> classifier </code> and the * specified <code> lossFunction. </code> The instances * should not contain missing values in the class attribute. * * @param classifier the trained classifier to use * @param instances the test instances * @param lossFunction the loss function to use * @return the total loss of all the instances using the given classifier and loss function */ public static double totalLoss(Classifier classifier, Instances instances, NominalLossFunction lossFunction) { double loss = 0; int n = instances.numInstances(); for (int i = 0; i < n; i++) { try { loss += lossFunction.loss(instances.instance(i).classValue(), classifier.classifyInstance(instances.instance(i))); } catch (Exception e) { // what should we do here ?? } } return loss; } /** * Classify a set of instances using a given classifier. The class value * of the instances are set. * * @param instances the instances to be classified * @param classifier a built classifier * @throws Exception if one of the instances could no be classified */ public static void classifyInstances(Instances instances, Classifier classifier) throws Exception { Iterator it = new EnumerationIterator(instances.enumerateInstances()); while(it.hasNext()) { Instance instance = (Instance) it.next(); instance.setClassValue(classifier.classifyInstance(instance)); } } /** * Calculates the relation (poset) formed by the instances. * * @param instances the instances for which the poset is to be formed * @return a <code> BooleanBitMatrix </code> for which position * <code> bm.get(i,j) == true </code> iff <code> * InstancesUtil.strictlySmaller(instances.instance(i), * instances.instance(j)) == true </code> */ public static BooleanBitMatrix getBitMatrix(Instances instances) { int numInstances = instances.numInstances(); BooleanBitMatrix bm = new BooleanBitMatrix(numInstances, numInstances); for (int i = 0; i < numInstances; i++ ) { Instance instance1 = instances.instance(i); for (int j = 0; j < numInstances; j++) { Instance instance2 = instances.instance(j); if (InstancesUtil.strictlySmaller(instance1, instance2)) { bm.set(i, j); // arc from instance1 to instance2 } } } return bm; } /** * Calculatus the number of elements in the closed interval * <code> [low,up]. </code> If the class index is set, then * the class attribute does not play part in the calculations, * this is we work in the data space. The code also works with * numeric attributes, but is primarily intended for ordinal attributes. * * @param low the lower bound of the interval * @param up the upper bound of the interval * @return the size of the interval (in floating point format) * @throws IllegalArgumentException if the given instances do not * constitute an interval. */ public static double numberInInterval(Instance low, Instance up) throws IllegalArgumentException { Coordinates cLow = new Coordinates(low); Coordinates cUp = new Coordinates(up); if (cLow.smallerOrEqual(cUp) == false) { throw new IllegalArgumentException ("The given instances are not the bounds of an interval"); } double number = 1; int dim = cLow.dimension(); for (int i = 0; i < dim; i++) { number *= (cUp.getValue(i) - cLow.getValue(i) + 1); } return number; } /** * Calculatutes the number of vectors in the data space that are smaller * or equal than the given instance. * * @param instance the given instance * @return the number of vectors in the data space smaller or equal * than the given instance * @throws IllegalArgumentException if there are numeric attributes */ public static double numberOfSmallerVectors(Instance instance) throws IllegalArgumentException { double[] values = InstancesUtil.toDataDouble(instance); double nr = 1; for (int i = 0; i < values.length; i++) { if (instance.attribute(i).isNumeric()) { throw new IllegalArgumentException ("Numeric attributes are not supported"); } nr *= (values[i] + 1); } return nr; } /** * Calculatutes the number of vectors in the data space that are * greater or equal than the given instance. * * @param instance the given instance * @return the number of vectors in the data space greater of equal * than the given instance * @throws IllegalArgumentException if there are numeric attributes */ public static double numberOfGreaterVectors(Instance instance) throws IllegalArgumentException { double[] values = InstancesUtil.toDataDouble(instance); double nr = 1; for (int i = 0; i < values.length; i++) { if (instance.attribute(i).isNumeric()) { throw new IllegalArgumentException ("Numeric attributes are not supported"); } nr *= (instance.attribute(i).numValues() - values[i]); } return nr; } /** * Write the instances in ARFF-format to the indicated * <code> BufferedWriter </code>. * @param instances the instances to write * @param file the <code> BufferedWriter </code> to write to * @throws IOException if something goes wrong while writing the instances */ public static void write(Instances instances, BufferedWriter file) throws IOException{ file.write(instances.toString()); // XXX can probably be done better } /** * Return a histogram of the values for the specified attribute. * * @param instances the instances * @param attributeIndex the attribute to consider * @return a <code> DiscreteEstimator </code> where the <code>i</code>th * @throws IllegalArgumentException if the attribute at the specified * index is numeric */ public static DiscreteEstimator countValues(Instances instances, int attributeIndex) throws IllegalArgumentException{ int numValues = instances.attribute(attributeIndex).numValues(); if (numValues == 0) { throw new IllegalArgumentException ("Can't create histogram for numeric attribute"); } DiscreteEstimator de = new DiscreteEstimator(numValues, false); Iterator it = new EnumerationIterator(instances.enumerateInstances()); while (it.hasNext()) { Instance instance = (Instance) it.next(); if (!instance.isMissing(attributeIndex)) { de.addValue(instance.value(attributeIndex), instance.weight()); } } return de; } /** * Create, without replacement, a random subsample of the given size * from the given instances. * * @param instances the instances to sample from * @param size the requested size of the sample * @param random the random generator to use * @return a sample of the requested size, drawn from the given * instances without replacement * @throws IllegalArgumentException if the size exceeds the number * of instances */ public static Instances sampleWithoutReplacement( Instances instances, int size, Random random) { if (size > instances.numInstances()) { throw new IllegalArgumentException ("Size of requested sample exceeds number of instances"); } int numInstances = instances.numInstances(); int[] indices = new int[instances.numInstances()]; for (int i = 0; i < numInstances; i++) { indices[i] = i; } Instances sample = new Instances(instances, size); int index; for (int i = 0; i < size; i++) { index = random.nextInt(numInstances--); sample.add(instances.instance(indices[index])); swap(indices, index, numInstances); } return sample; } /** * Swaps two elements of the given array. * * @param aa the array * @param i the index of the first element * @param j the index of the second element */ final private static void swap(int[] aa, int i, int j) { int tmp = aa[i]; aa[i] = aa[j]; aa[j] = tmp; } /** * Generates a random sample of instances. Each attribute must be nominal, and the * class labels are not set. * * @param headerInfo Instances whose header information is used to determine how the * set of returned instances will look * @param numberOfExamples the desired size of the returned set * @param random the random number generator to use * @return a set of Instances containing the random sample. * @throws IllegalArgumentException if numeric attributes are given */ public static Instances generateRandomSample( Instances headerInfo, int numberOfExamples, Random random) throws IllegalArgumentException { int n = headerInfo.numAttributes(); double[] info = new double[n]; int classIndex = headerInfo.classIndex(); for (int i = 0; i < n; i++) { info[i] = headerInfo.attribute(i).numValues(); if (i != classIndex && info[i] == 0) { throw new IllegalArgumentException ("Numeric attributes are currently not supported"); } } Instances sample = new Instances(headerInfo, numberOfExamples); sample.setRelationName(headerInfo.relationName() + ".random.sample.of." + numberOfExamples); for (int i = 0; i < numberOfExamples; i++) { sample.add(randomSample(info, classIndex, random)); } return sample; } /** * Generates a random instance. * * @param info array that gives for each attribute the number of possible values * @param classIndex the index of the class attribute * @param random the random number generator used * @return a random instance */ private static Instance randomSample(double[] info, int classIndex, Random random) { double[] attValues = new double[info.length]; for (int i = 0; i < attValues.length; i++) { if (i != classIndex) { attValues[i] = random.nextInt( (int) info[i]); } } return new Instance(1, attValues); } /** * Returns an array containing the attribute values (in internal floating * point format) of the given instance in data space, this is, the class * attribute (if any) is removed. * * @param instance the instance to get the attribute values from * @return array of doubles containing the attribute values */ public static double[] toDataDouble(Instance instance) { double[] vector = null; int classIndex = instance.classIndex(); if(classIndex >= 0) { vector = new double[instance.numAttributes() - 1]; } else { vector = new double[instance.numAttributes()]; } int index = 0; for (int i = 0; i < instance.numAttributes(); i++) { if(i != classIndex) { vector[index++] = instance.value(i); } } return vector; } /** * Computes the minimal extension for a given instance. * * @param instances the set of instances * @param instance the instance for which the minimal extension is to be * calculated * @return the value of the minimal extension, in internal floating point * format */ public static double minimalExtension(Instances instances, Instance instance) { return minimalExtension(instances, instance, 0); } /** * Computes the minimal extension of a given instance, but the * minimal value returned is <code> minValue. </code> This method * may have its applications when the training set is divided into * multiple Instances objects. * * @param instances the set of instances * @param instance the instance for which the minimal extension is to * be calculated * @param minValue a double indicating the minimal value that should * be returned * @return the label of the minimal extension, in internal floating point format */ public static double minimalExtension( Instances instances, Instance instance, double minValue) { double value = minValue; Iterator it = new EnumerationIterator(instances.enumerateInstances()); while(it.hasNext()) { Instance tmp = (Instance) it.next(); if (tmp.classValue() > value && InstancesUtil.smallerOrEqual(tmp, instance) ) { value = tmp.classValue(); } } return value; } /** * Computes the maximal extension for a given instance. * * @param instances the set of instances * @param instance the instance for which the minimal extension is to be * calculated * @return the value of the minimal extension, in internal floating point * format */ public static double maximalExtension(Instances instances, Instance instance) { return maximalExtension(instances, instance, instances.numClasses() - 1); } /** * Computes the maximal extension of a given instance, but the * maximal value returned is <code> maxValue. </code> This method * may have its applications when the training set is divided into * multiple Instances objects. * * @param instances the set of instances * @param instance the instance for which the maximal extension is to * be calculated * @param maxValue a double indicating the maximal value that should * be returned * @return the value of the minimal extension, in internal floating point * format */ public static double maximalExtension( Instances instances, Instance instance, double maxValue) { double value = maxValue; Iterator it = new EnumerationIterator(instances.enumerateInstances()); while(it.hasNext()) { Instance tmp = (Instance) it.next(); if (tmp.classValue() < value && InstancesUtil.smallerOrEqual(instance, tmp) ) { value = tmp.classValue(); } } return value; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -