📄 instances.java
字号:
} if ((i == 0) || (current.value(attIndex) > prev)) { prev = current.value(attIndex); counter++; } } return counter; } else { return attribute(attIndex).numValues(); } } /** * Returns the number of distinct values of a given attribute. * Returns the number of instances if the attribute is a * string attribute. The value 'missing' is not counted. * * @param att the attribute * @return the number of distinct values of a given attribute */ public /*@pure@*/ int numDistinctValues(/*@non_null@*/Attribute att) { return numDistinctValues(att.index()); } /** * Returns the number of instances in the dataset. * * @return the number of instances in the dataset as an integer */ //@ ensures \result == m_Instances.size(); public /*@pure@*/ int numInstances() { return m_Instances.size(); } /** * Shuffles the instances in the set so that they are ordered * randomly. * * @param random a random number generator */ public void randomize(Random random) { for (int j = numInstances() - 1; j > 0; j--) swap(j, random.nextInt(j+1)); } /** * Reads a single instance from the reader and appends it * to the dataset. Automatically expands the dataset if it * is not large enough to hold the instance. This method does * not check for carriage return at the end of the line. * * @param reader the reader * @return false if end of file has been reached * @throws IOException if the information is not read * successfully * @deprecated instead of using this method in conjunction with the * <code>readInstance(Reader)</code> method, one should use the * <code>ArffLoader</code> or <code>DataSource</code> class instead. * @see weka.core.converters.ArffLoader * @see weka.core.converters.ConverterUtils.DataSource */ @Deprecated public boolean readInstance(Reader reader) throws IOException { ArffReader arff = new ArffReader(reader, this, m_Lines, 1); Instance inst = arff.readInstance(arff.getData(), false); m_Lines = arff.getLineNo(); if (inst != null) { add(inst); return true; } else { return false; } } /** * Returns the relation's name. * * @return the relation's name as a string */ //@ ensures \result == m_RelationName; public /*@pure@*/ String relationName() { return m_RelationName; } /** * Renames an attribute. This change only affects this * dataset. * * @param att the attribute's index (index starts with 0) * @param name the new name */ public void renameAttribute(int att, String name) { Attribute newAtt = attribute(att).copy(name); FastVector newVec = new FastVector(numAttributes()); for (int i = 0; i < numAttributes(); i++) { if (i == att) { newVec.addElement(newAtt); } else { newVec.addElement(attribute(i)); } } m_Attributes = newVec; } /** * Renames an attribute. This change only affects this * dataset. * * @param att the attribute * @param name the new name */ public void renameAttribute(Attribute att, String name) { renameAttribute(att.index(), name); } /** * Renames the value of a nominal (or string) attribute value. This * change only affects this dataset. * * @param att the attribute's index (index starts with 0) * @param val the value's index (index starts with 0) * @param name the new name */ public void renameAttributeValue(int att, int val, String name) { Attribute newAtt = (Attribute)attribute(att).copy(); FastVector newVec = new FastVector(numAttributes()); newAtt.setValue(val, name); for (int i = 0; i < numAttributes(); i++) { if (i == att) { newVec.addElement(newAtt); } else { newVec.addElement(attribute(i)); } } m_Attributes = newVec; } /** * Renames the value of a nominal (or string) attribute value. This * change only affects this dataset. * * @param att the attribute * @param val the value * @param name the new name */ public void renameAttributeValue(Attribute att, String val, String name) { int v = att.indexOfValue(val); if (v == -1) throw new IllegalArgumentException(val + " not found"); renameAttributeValue(att.index(), v, name); } /** * Creates a new dataset of the same size using random sampling * with replacement. * * @param random a random number generator * @return the new dataset */ public Instances resample(Random random) { Instances newData = new Instances(this, numInstances()); while (newData.numInstances() < numInstances()) { newData.add(instance(random.nextInt(numInstances()))); } return newData; } /** * Creates a new dataset of the same size using random sampling * with replacement according to the current instance weights. The * weights of the instances in the new dataset are set to one. * * @param random a random number generator * @return the new dataset */ public Instances resampleWithWeights(Random random) { double [] weights = new double[numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = instance(i).weight(); } return resampleWithWeights(random, weights); } /** * Creates a new dataset of the same size using random sampling * with replacement according to the given weight vector. The * weights of the instances in the new dataset are set to one. * The length of the weight vector has to be the same as the * number of instances in the dataset, and all weights have to * be positive. * * @param random a random number generator * @param weights the weight vector * @return the new dataset * @throws IllegalArgumentException if the weights array is of the wrong * length or contains negative weights. */ public Instances resampleWithWeights(Random random, double[] weights) { if (weights.length != numInstances()) { throw new IllegalArgumentException("weights.length != numInstances."); } Instances newData = new Instances(this, numInstances()); if (numInstances() == 0) { return newData; } double[] probabilities = new double[numInstances()]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < numInstances(); i++) { sumProbs += random.nextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[numInstances() - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < numInstances() && (l < numInstances()))) { if (weights[l] < 0) { throw new IllegalArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < numInstances()) && (probabilities[k] <= sumProbs)) { newData.add(instance(l)); newData.instance(k).setWeight(1); k++; } l++; } return newData; } /** * Sets the class attribute. * * @param att attribute to be the class */ public void setClass(Attribute att) { m_ClassIndex = att.index(); } /** * Sets the class index of the set. * If the class index is negative there is assumed to be no class. * (ie. it is undefined) * * @param classIndex the new class index (index starts with 0) * @throws IllegalArgumentException if the class index is too big or < 0 */ public void setClassIndex(int classIndex) { if (classIndex >= numAttributes()) { throw new IllegalArgumentException("Invalid class index: " + classIndex); } m_ClassIndex = classIndex; } /** * Sets the relation's name. * * @param newName the new relation name. */ public void setRelationName(/*@non_null@*/String newName) { m_RelationName = newName; } /** * Sorts the instances based on an attribute. For numeric attributes, * instances are sorted in ascending order. For nominal attributes, * instances are sorted based on the attribute label ordering * specified in the header. Instances with missing values for the * attribute are placed at the end of the dataset. * * @param attIndex the attribute's index (index starts with 0) */ public void sort(int attIndex) { int i,j; // move all instances with missing values to end j = numInstances() - 1; i = 0; while (i <= j) { if (instance(j).isMissing(attIndex)) { j--; } else { if (instance(i).isMissing(attIndex)) { swap(i,j); j--; } i++; } } quickSort(attIndex, 0, j); } /** * Sorts the instances based on an attribute. For numeric attributes, * instances are sorted into ascending order. For nominal attributes, * instances are sorted based on the attribute label ordering * specified in the header. Instances with missing values for the * attribute are placed at the end of the dataset. * * @param att the attribute */ public void sort(Attribute att) { sort(att.index()); } /** * Stratifies a set of instances according to its class values * if the class attribute is nominal (so that afterwards a * stratified cross-validation can be performed). * * @param numFolds the number of folds in the cross-validation * @throws UnassignedClassException if the class is not set */ public void stratify(int numFolds) { if (numFolds <= 0) { throw new IllegalArgumentException("Number of folds must be greater than 1"); } if (m_ClassIndex < 0) { throw new UnassignedClassException("Class index is negative (not set)!"); } if (classAttribute().isNominal()) { // sort by class int index = 1; while (index < numInstances()) { Instance instance1 = instance(index - 1); for (int j = index; j < numInstances(); j++) { Instance instance2 = instance(j); if ((instance1.classValue() == instance2.classValue()) || (instance1.classIsMissing() && instance2.classIsMissing())) { swap(index,j); index++; } } index++; } stratStep(numFolds); } } /** * Computes the sum of all the instances' weights. * * @return the sum of all the instances' weights as a double */ public /*@pure@*/ double sumOfWeights() { double sum = 0; for (int i = 0; i < numInstances(); i++) { sum += instance(i).weight(); } return sum; } /** * Creates the test set for one fold of a cross-validation on * the dataset. * * @param numFolds the number of folds in the cross-validation. Must * be greater than 1. * @param numFold 0 for the first fold, 1 for the second, ... * @return the test set as a set of weighted instances * @throws IllegalArgumentException if the number of folds is less than 2 * or greater than the number of instances. */ //@ requires 2 <= numFolds && numFolds < numInstances(); //@ requires 0 <= numFold && numFold < numFolds; public Instances testCV(int numFolds, int numFold) { int numInstForFold, first, offset; Instances test; if (numFolds < 2) { throw new IllegalArgumentException("Number of folds must be at least 2!"); } if (numFolds > numInstances()) { throw new IllegalArgumentException("Can't have more folds than instances!"); } numInstForFold = numInstances() / numFolds; if (numFold < numInstances() % numFolds){ numInstForFold++; offset = numFold; }else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -