📄 instances.java
字号:
/** * Implements computation of the kth-smallest element according * to Manber's "Introduction to Algorithms". * * @param attIndex the attribute's index (index starts with 0) * @param left the first index of the subset (index starts with 0) * @param right the last index of the subset (index starts with 0) * @param k the value of k * * @return the index of the kth-smallest element */ //@ requires 0 <= attIndex && attIndex < numAttributes(); //@ requires 0 <= first && first <= right && right < numInstances(); protected int select(int attIndex, int left, int right, int k) { if (left == right) { return left; } else { int middle = partition(attIndex, left, right); if ((middle - left + 1) >= k) { return select(attIndex, left, middle, k); } else { return select(attIndex, middle + 1, right, k - (middle - left + 1)); } } } /** * Help function needed for stratification of set. * * @param numFolds the number of folds for the stratification */ protected void stratStep (int numFolds){ FastVector newVec = new FastVector(m_Instances.capacity()); int start = 0, j; // create stratified batch while (newVec.size() < numInstances()) { j = start; while (j < numInstances()) { newVec.addElement(instance(j)); j = j + numFolds; } start++; } m_Instances = newVec; } /** * Swaps two instances in the set. * * @param i the first instance's index (index starts with 0) * @param j the second instance's index (index starts with 0) */ //@ requires 0 <= i && i < numInstances(); //@ requires 0 <= j && j < numInstances(); public void swap(int i, int j){ m_Instances.swap(i, j); } /** * Merges two sets of Instances together. The resulting set will have * all the attributes of the first set plus all the attributes of the * second set. The number of instances in both sets must be the same. * * @param first the first set of Instances * @param second the second set of Instances * @return the merged set of Instances * @throws IllegalArgumentException if the datasets are not the same size */ public static Instances mergeInstances(Instances first, Instances second) { if (first.numInstances() != second.numInstances()) { throw new IllegalArgumentException("Instance sets must be of the same size"); } // Create the vector of merged attributes FastVector newAttributes = new FastVector(); for (int i = 0; i < first.numAttributes(); i++) { newAttributes.addElement(first.attribute(i)); } for (int i = 0; i < second.numAttributes(); i++) { newAttributes.addElement(second.attribute(i)); } // Create the set of Instances Instances merged = new Instances(first.relationName() + '_' + second.relationName(), newAttributes, first.numInstances()); // Merge each instance for (int i = 0; i < first.numInstances(); i++) { merged.add(first.instance(i).mergeInstance(second.instance(i))); } return merged; } /** * Method for testing this class. * * @param argv should contain one element: the name of an ARFF file */ //@ requires argv != null; //@ requires argv.length == 1; //@ requires argv[0] != null; public static void test(String [] argv) { Instances instances, secondInstances, train, test, empty; Random random = new Random(2); Reader reader; int start, num; FastVector testAtts, testVals; int i,j; try{ if (argv.length > 1) { throw (new Exception("Usage: Instances [<filename>]")); } // Creating set of instances from scratch testVals = new FastVector(2); testVals.addElement("first_value"); testVals.addElement("second_value"); testAtts = new FastVector(2); testAtts.addElement(new Attribute("nominal_attribute", testVals)); testAtts.addElement(new Attribute("numeric_attribute")); instances = new Instances("test_set", testAtts, 10); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.setClassIndex(0); System.out.println("\nSet of instances created from scratch:\n"); System.out.println(instances); if (argv.length == 1) { String filename = argv[0]; reader = new FileReader(filename); // Read first five instances and print them System.out.println("\nFirst five instances from file:\n"); instances = new Instances(reader, 1); instances.setClassIndex(instances.numAttributes() - 1); i = 0; while ((i < 5) && (instances.readInstance(reader))) { i++; } System.out.println(instances); // Read all the instances in the file reader = new FileReader(filename); instances = new Instances(reader); // Make the last attribute be the class instances.setClassIndex(instances.numAttributes() - 1); // Print header and instances. System.out.println("\nDataset:\n"); System.out.println(instances); System.out.println("\nClass index: "+instances.classIndex()); } // Test basic methods based on class index. System.out.println("\nClass name: "+instances.classAttribute().name()); System.out.println("\nClass index: "+instances.classIndex()); System.out.println("\nClass is nominal: " + instances.classAttribute().isNominal()); System.out.println("\nClass is numeric: " + instances.classAttribute().isNumeric()); System.out.println("\nClasses:\n"); for (i = 0; i < instances.numClasses(); i++) { System.out.println(instances.classAttribute().value(i)); } System.out.println("\nClass values and labels of instances:\n"); for (i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); System.out.print(inst.classValue() + "\t"); System.out.print(inst.toString(inst.classIndex())); if (instances.instance(i).classIsMissing()) { System.out.println("\tis missing"); } else { System.out.println(); } } // Create random weights. System.out.println("\nCreating random weights for instances."); for (i = 0; i < instances.numInstances(); i++) { instances.instance(i).setWeight(random.nextDouble()); } // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); // Insert an attribute secondInstances = new Instances(instances); Attribute testAtt = new Attribute("Inserted"); secondInstances.insertAttributeAt(testAtt, 0); System.out.println("\nSet with inserted attribute:\n"); System.out.println(secondInstances); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Delete the attribute secondInstances.deleteAttributeAt(0); System.out.println("\nSet with attribute deleted:\n"); System.out.println(secondInstances); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Test if headers are equal System.out.println("\nHeaders equal: "+ instances.equalHeaders(secondInstances) + "\n"); // Print data in internal format. System.out.println("\nData (internal values):\n"); for (i = 0; i < instances.numInstances(); i++) { for (j = 0; j < instances.numAttributes(); j++) { if (instances.instance(i).isMissing(j)) { System.out.print("? "); } else { System.out.print(instances.instance(i).value(j) + " "); } } System.out.println(); } // Just print header System.out.println("\nEmpty dataset:\n"); empty = new Instances(instances, 0); System.out.println(empty); System.out.println("\nClass name: "+empty.classAttribute().name()); // Create copy and rename an attribute and a value (if possible) if (empty.classAttribute().isNominal()) { Instances copy = new Instances(empty, 0); copy.renameAttribute(copy.classAttribute(), "new_name"); copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value(0), "new_val_name"); System.out.println("\nDataset with names changed:\n" + copy); System.out.println("\nOriginal dataset:\n" + empty); } // Create and prints subset of instances. start = instances.numInstances() / 4; num = instances.numInstances() / 2; System.out.print("\nSubset of dataset: "); System.out.println(num + " instances from " + (start + 1) + ". instance"); secondInstances = new Instances(instances, start, num); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(secondInstances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(secondInstances.sumOfWeights()); // Create and print training and test sets for 3-fold // cross-validation. System.out.println("\nTrain and test folds for 3-fold CV:"); if (instances.classAttribute().isNominal()) { instances.stratify(3); } for (j = 0; j < 3; j++) { train = instances.trainCV(3,j, new Random(1)); test = instances.testCV(3,j); // Print all instances and their weights (and the sum of weights). System.out.println("\nTrain: "); System.out.println("\nInstances and their weights:\n"); System.out.println(train.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(train.sumOfWeights()); System.out.println("\nClass name: "+train.classAttribute().name()); System.out.println("\nTest: "); System.out.println("\nInstances and their weights:\n"); System.out.println(test.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(test.sumOfWeights()); System.out.println("\nClass name: "+test.classAttribute().name()); } // Randomize instances and print them. System.out.println("\nRandomized dataset:"); instances.randomize(random); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); // Sort instances according to first attribute and // print them. System.out.print("\nInstances sorted according to first attribute:\n "); instances.sort(0); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); } catch (Exception e) { e.printStackTrace(); } } /** * Main method for this class. The following calls are possible: * <ul> * <li> * <code>weka.core.Instances</code> help<br/> * prints a short list of possible commands. * </li> * <li> * <code>weka.core.Instances</code> <filename><br/> * prints a summary of a set of instances. * </li> * <li> * <code>weka.core.Instances</code> merge <filename1> <filename2><br/> * merges the two datasets (must have same number of instances) and * outputs the results on stdout. * </li> * <li> * <code>weka.core.Instances</code> append <filename1> <filename2><br/> * appends the second dataset to the first one (must have same headers) and * outputs the results on stdout. * </li> * <li> * <code>weka.core.Instances</code> randomize <seed> <filename><br/> * randomizes the dataset with the given seed and outputs the result on stdout. * </li> * </ul> * * @param args the commandline parameters */ public static void main(String[] args) { try { Instances i; // read from stdin and print statistics if (args.length == 0) { DataSource source = new DataSource(System.in); i = source.getDataSet(); System.out.println(i.toSummaryString()); } // read file and print statistics else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) { DataSource source = new DataSource(args[0]); i = source.getDataSet(); System.out.println(i.toSummaryString()); } // read two files, merge them and print result to stdout else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) { DataSource source1 = new DataSource(args[1]); DataSource source2 = new DataSource(args[2]); i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet()); System.out.println(i); } // read two files, append them and print result to stdout else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) { DataSource source1 = new DataSource(args[1]); DataSource source2 = new DataSource(args[2]); if (!source1.getStructure().equalHeaders(source2.getStructure())) throw new Exception("The two datasets have different headers!"); Instances structure = source1.getStructure(); System.out.println(source1.getStructure()); while (source1.hasMoreElements(structure)) System.out.println(source1.nextElement(structure)); structure = source2.getStructure(); while (source2.hasMoreElements(structure)) System.out.println(source2.nextElement(structure)); } // read file and seed value, randomize data and print result to stdout else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) { DataSource source = new DataSource(args[2]); i = source.getDataSet(); i.randomize(new Random(Integer.parseInt(args[1]))); System.out.println(i); } // wrong parameters else { System.err.println( "\nUsage:\n" + "\tweka.core.Instances help\n" + "\tweka.core.Instances <filename>\n" + "\tweka.core.Instances merge <filename1> <filename2>\n" + "\tweka.core.Instances append <filename1> <filename2>\n" + "\tweka.core.Instances randomize <seed> <filename>\n" ); System.exit(1); } } catch (Exception ex) { ex.printStackTrace(); System.err.println(ex.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -