⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instances.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	       (tokenizer.ttype == '"')) {      tokenizer.ttype = StreamTokenizer.TT_WORD;    } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&	       (tokenizer.sval.equals("?"))){      tokenizer.ttype = '?';    }  }	  /**   * Initializes the StreamTokenizer used for reading the ARFF file.   *   * @param tokenizer the stream tokenizer   */  private void initTokenizer(StreamTokenizer tokenizer){    tokenizer.resetSyntax();             tokenizer.whitespaceChars(0, ' ');        tokenizer.wordChars(' '+1,'\u00FF');    tokenizer.whitespaceChars(',',',');    tokenizer.commentChar('%');    tokenizer.quoteChar('"');    tokenizer.quoteChar('\'');    tokenizer.ordinaryChar('{');    tokenizer.ordinaryChar('}');    tokenizer.eolIsSignificant(true);  }   /**   * Returns string including all instances, their weights and   * their indices in the original dataset.   *   * @return description of instance and its weight as a string   */  private String instancesAndWeights(){    StringBuffer text = new StringBuffer();    for (int i = 0; i < numInstances(); i++) {      text.append(instance(i) + " " + instance(i).weight());      if (i < numInstances() - 1) {	text.append("\n");      }    }    return text.toString();  }    /**   * Implements quicksort.   *   * @param attIndex the attribute's index   * @param lo0 the first index of the subset to be sorted   * @param hi0 the last index of the subset to be sorted   */  private void quickSort(int attIndex, int lo0, int hi0) {        int lo = lo0, hi = hi0;    double mid, midPlus, midMinus;        if (hi0 > lo0) {            // Arbitrarily establishing partition element as the       // midpoint of the array.      mid = instance((lo0 + hi0) / 2).value(attIndex);      midPlus = mid + 1e-6;      midMinus = mid - 1e-6;      // loop through the array until indices cross      while(lo <= hi) {		// find the first element that is greater than or equal to 	// the partition element starting from the left Index.	while ((instance(lo).value(attIndex) < 		midMinus) && (lo < hi0)) {	  ++lo;	}		// find an element that is smaller than or equal to	// the partition element starting from the right Index.	while ((instance(hi).value(attIndex)  > 		midPlus) && (hi > lo0)) {	  --hi;	}		// if the indexes have not crossed, swap	if(lo <= hi) {	  swap(lo,hi);	  ++lo;	  --hi;	}      }            // If the right index has not reached the left side of array      // must now sort the left partition.      if(lo0 < hi) {	quickSort(attIndex,lo0,hi);      }            // If the left index has not reached the right side of array      // must now sort the right partition.      if(lo < hi0) {	quickSort(attIndex,lo,hi0);      }    }  }  /**   * Reads and skips all tokens before next end of line token.   *   * @param tokenizer the stream tokenizer   */  private void readTillEOL(StreamTokenizer tokenizer)        throws IOException{        while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {};    tokenizer.pushBack();  }  /**   * Help function needed for stratification of set.   *   * @param numFolds the number of folds for the stratification   */  private void stratStep (int numFolds){        FastVector newVec = new FastVector(m_Instances.capacity());    int start = 0, j;    // create stratified batch    while (newVec.size() < numInstances()) {      j = start;      while (j < numInstances()) {	newVec.addElement(instance(j));	j = j + numFolds;      }      start++;    }    m_Instances = newVec;  }    /**   * Swaps two instances in the set.   *   * @param i the first instance's index   * @param j the second instance's index   */  private void swap(int i, int j){        m_Instances.swap(i, j);  }  /**   * Merges two sets of Instances together. The resulting set will have   * all the attributes of the first set plus all the attributes of the    * second set. The number of instances in both sets must be the same.   *   * @param first the first set of Instances   * @param second the second set of Instances   * @return the merged set of Instances   * @exception IllegalArgumentException if the datasets are not the same size   */  public static Instances mergeInstances(Instances first, Instances second) {    if (first.numInstances() != second.numInstances()) {      throw new IllegalArgumentException("Instance sets must be of the same size");    }    // Create the vector of merged attributes    FastVector newAttributes = new FastVector();    for (int i = 0; i < first.numAttributes(); i++) {      newAttributes.addElement(first.attribute(i));    }    for (int i = 0; i < second.numAttributes(); i++) {      newAttributes.addElement(second.attribute(i));    }        // Create the set of Instances    Instances merged = new Instances(first.relationName() + '_'				     + second.relationName(), 				     newAttributes, 				     first.numInstances());    // Merge each instance    for (int i = 0; i < first.numInstances(); i++) {      merged.add(first.instance(i).mergeInstance(second.instance(i)));    }    return merged;  }  /**   * Method for testing this class.   *   * @param argv should contain one element: the name of an ARFF file   */  public static void test(String [] argv) {    Instances instances, secondInstances, train, test, transformed, empty;    Instance instance;    Random random = new Random(2);    Reader reader;    int start, num;    double newWeight;    FastVector testAtts, testVals;    int i,j;        try{      if (argv.length > 1) {	throw (new Exception("Usage: Instances [<filename>]"));      }            // Creating set of instances from scratch      testVals = new FastVector(2);      testVals.addElement("first_value");      testVals.addElement("second_value");      testAtts = new FastVector(2);      testAtts.addElement(new Attribute("nominal_attribute", testVals));      testAtts.addElement(new Attribute("numeric_attribute"));      instances = new Instances("test_set", testAtts, 10);      instances.add(new Instance(instances.numAttributes()));      instances.add(new Instance(instances.numAttributes()));      instances.add(new Instance(instances.numAttributes()));      instances.setClassIndex(0);      System.out.println("\nSet of instances created from scratch:\n");      System.out.println(instances);            if (argv.length == 1) {	String filename = argv[0];	reader = new FileReader(filename);		// Read first five instances and print them	System.out.println("\nFirst five instances from file:\n");	instances = new Instances(reader, 1);	instances.setClassIndex(instances.numAttributes() - 1);	i = 0;	while ((i < 5) && (instances.readInstance(reader))) {	  i++;	}	System.out.println(instances);	// Read all the instances in the file	reader = new FileReader(filename);	instances = new Instances(reader);	// Make the last attribute be the class 	instances.setClassIndex(instances.numAttributes() - 1);		// Print header and instances.	System.out.println("\nDataset:\n");	System.out.println(instances);	System.out.println("\nClass index: "+instances.classIndex());      }            // Test basic methods based on class index.      System.out.println("\nClass name: "+instances.classAttribute().name());      System.out.println("\nClass index: "+instances.classIndex());      System.out.println("\nClass is nominal: " +			 instances.classAttribute().isNominal());      System.out.println("\nClass is numeric: " +			 instances.classAttribute().isNumeric());      System.out.println("\nClasses:\n");      for (i = 0; i < instances.numClasses(); i++) {	System.out.println(instances.classAttribute().value(i));      }      System.out.println("\nClass values and labels of instances:\n");      for (i = 0; i < instances.numInstances(); i++) {	Instance inst = instances.instance(i);	System.out.print(inst.classValue() + "\t");	System.out.print(inst.toString(inst.classIndex()));	if (instances.instance(i).classIsMissing()) {	  System.out.println("\tis missing");	} else {	  System.out.println();	}      }            // Create random weights.      System.out.println("\nCreating random weights for instances.");      for (i = 0; i < instances.numInstances(); i++) {	instances.instance(i).setWeight(random.nextDouble());       }            // Print all instances and their weights (and the sum of weights).      System.out.println("\nInstances and their weights:\n");      System.out.println(instances.instancesAndWeights());      System.out.print("\nSum of weights: ");      System.out.println(instances.sumOfWeights());            // Insert an attribute      secondInstances = new Instances(instances);      Attribute testAtt = new Attribute("Inserted");      secondInstances.insertAttributeAt(testAtt, 0);      System.out.println("\nSet with inserted attribute:\n");      System.out.println(secondInstances);      System.out.println("\nClass name: "			 + secondInstances.classAttribute().name());            // Delete the attribute      secondInstances.deleteAttributeAt(0);      System.out.println("\nSet with attribute deleted:\n");      System.out.println(secondInstances);      System.out.println("\nClass name: "			 + secondInstances.classAttribute().name());            // Test if headers are equal      System.out.println("\nHeaders equal: "+			 instances.equalHeaders(secondInstances) + "\n");            // Print data in internal format.      System.out.println("\nData (internal values):\n");      for (i = 0; i < instances.numInstances(); i++) {	for (j = 0; j < instances.numAttributes(); j++) {	  if (instances.instance(i).isMissing(j)) {	    System.out.print("? ");	  } else {	    System.out.print(instances.instance(i).value(j) + " ");	  }	}	System.out.println();      }            // Just print header      System.out.println("\nEmpty dataset:\n");      empty = new Instances(instances, 0);      System.out.println(empty);      System.out.println("\nClass name: "+empty.classAttribute().name());      // Create copy and rename an attribute and a value (if possible)      if (empty.classAttribute().isNominal()) {	Instances copy = new Instances(empty, 0);	copy.renameAttribute(copy.classAttribute(), "new_name");	copy.renameAttributeValue(copy.classAttribute(), 				  copy.classAttribute().value(0), 				  "new_val_name");	System.out.println("\nDataset with names changed:\n" + copy);	System.out.println("\nOriginal dataset:\n" + empty);      }      // Create and prints subset of instances.      start = instances.numInstances() / 4;      num = instances.numInstances() / 2;      System.out.print("\nSubset of dataset: ");      System.out.println(num + " instances from " + (start + 1) 			 + ". instance");      secondInstances = new Instances(instances, start, num);      System.out.println("\nClass name: "			 + secondInstances.classAttribute().name());      // Print all instances and their weights (and the sum of weights).      System.out.println("\nInstances and their weights:\n");      System.out.println(secondInstances.instancesAndWeights());      System.out.print("\nSum of weights: ");      System.out.println(secondInstances.sumOfWeights());            // Create and print training and test sets for 3-fold      // cross-validation.      System.out.println("\nTrain and test folds for 3-fold CV:");      if (instances.classAttribute().isNominal()) {	instances.stratify(3);      }      for (j = 0; j < 3; j++) {        train = instances.trainCV(3,j);	test = instances.testCV(3,j);                      	// Print all instances and their weights (and the sum of weights).	System.out.println("\nTrain: ");	System.out.println("\nInstances and their weights:\n");	System.out.println(train.instancesAndWeights());	System.out.print("\nSum of weights: ");	System.out.println(train.sumOfWeights());	System.out.println("\nClass name: "+train.classAttribute().name());	System.out.println("\nTest: ");	System.out.println("\nInstances and their weights:\n");	System.out.println(test.instancesAndWeights());	System.out.print("\nSum of weights: ");	System.out.println(test.sumOfWeights());	System.out.println("\nClass name: "+test.classAttribute().name());      }      // Randomize instances and print them.      System.out.println("\nRandomized dataset:");      instances.randomize(random);            // Print all instances and their weights (and the sum of weights).      System.out.println("\nInstances and their weights:\n");      System.out.println(instances.instancesAndWeights());      System.out.print("\nSum of weights: ");      System.out.println(instances.sumOfWeights());      // Sort instances according to first attribute and      // print them.      System.out.print("\nInstances sorted according to first attribute:\n ");      instances.sort(0);              // Print all instances and their weights (and the sum of weights).      System.out.println("\nInstances and their weights:\n");      System.out.println(instances.instancesAndWeights());      System.out.print("\nSum of weights: ");      System.out.println(instances.sumOfWeights());    } catch (Exception e) {      e.printStackTrace();     }  }  /**   * Main method for this class -- just prints a summary of a set   * of instances.   *   * @param argv should contain one element: the name of an ARFF file   */  public static void main(String [] args) {    try {      Reader r = null;      if (args.length > 1) {	throw (new Exception("Usage: Instances <filename>"));      } else if (args.length == 0) {        r = new BufferedReader(new InputStreamReader(System.in));      } else {        r = new BufferedReader(new FileReader(args[0]));      }      Instances i = new Instances(r);      System.out.println(i.toSummaryString());    } catch (Exception ex) {      System.err.println(ex.getMessage());    }  }}     

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -