evaluation.java

来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 2,078 行 · 第 1/5 页

JAVA
2,078
字号
   *   * -l filename <br>   * Loads classifier from the given file. In case the filename ends with   * ".xml" the options are loaded from XML. <p>   *   * -d filename <br>   * Saves classifier built from the training data into the given file. In case    * the filename ends with ".xml" the options are saved XML, not the model. <p>   *   * -v <br>   * Outputs no statistics for the training data. <p>   *   * -o <br>   * Outputs statistics only, not the classifier. <p>   *    * -i <br>   * Outputs detailed information-retrieval statistics per class. <p>   *   * -k <br>   * Outputs information-theoretic statistics. <p>   *   * -p range <br>   * Outputs predictions for test instances, along with the attributes in    * the specified range (and nothing else). Use '-p 0' if no attributes are   * desired. <p>   *   * -r <br>   * Outputs cumulative margin distribution (and nothing else). <p>   *   * -g <br>    * Only for classifiers that implement "Graphable." Outputs   * the graph representation of the classifier (and nothing   * else). <p>   *   * -xml filename | xml-string <br>   * Retrieves the options from the XML-data instead of the command line. <p>   *   * @param classifierString class of machine learning classifier as a string   * @param options the array of string containing the options   * @exception Exception if model could not be evaluated successfully   * @return a string describing the results    */  public static String evaluateModel(String classifierString, 				     String [] options) throws Exception {    Classifier classifier;	     // Create classifier    try {      classifier =       (Classifier)Class.forName(classifierString).newInstance();    } catch (Exception e) {      throw new Exception("Can't find class with name " 			  + classifierString + '.');    }    return evaluateModel(classifier, options);  }  /**   * A test method for this class. Just extracts the first command line   * argument as a classifier class name and calls evaluateModel.   * @param args an array of command line arguments, the first of which   * must be the class name of a classifier.   */  public static void main(String [] args) {    try {      if (args.length == 0) {	throw new Exception("The first argument must be the class name"			    + " of a classifier");      }      String classifier = args[0];      args[0] = "";      System.out.println(evaluateModel(classifier, args));    } catch (Exception ex) {      ex.printStackTrace();      System.err.println(ex.getMessage());    }  }  /**   * Evaluates a classifier with the options given in an array of   * strings. <p>   *   * Valid options are: <p>   *   * -t name of training file <br>   * Name of the file with the training data. (required) <p>   *   * -T name of test file <br>   * Name of the file with the test data. If missing a cross-validation    * is performed. <p>   *   * -c class index <br>   * Index of the class attribute (1, 2, ...; default: last). <p>   *   * -x number of folds <br>   * The number of folds for the cross-validation (default: 10). <p>   *   * -s random number seed <br>   * Random number seed for the cross-validation (default: 1). <p>   *   * -m file with cost matrix <br>   * The name of a file containing a cost matrix. <p>   *   * -l filename <br>   * Loads classifier from the given file. In case the filename ends with   * ".xml" the options are loaded from XML. <p>   *   * -d filename <br>   * Saves classifier built from the training data into the given file. In case    * the filename ends with ".xml" the options are saved XML, not the model. <p>   *   * -v <br>   * Outputs no statistics for the training data. <p>   *   * -o <br>   * Outputs statistics only, not the classifier. <p>   *    * -i <br>   * Outputs detailed information-retrieval statistics per class. <p>   *   * -k <br>   * Outputs information-theoretic statistics. <p>   *   * -p <br>   * Outputs predictions for test instances (and nothing else). <p>   *   * -r <br>   * Outputs cumulative margin distribution (and nothing else). <p>   *   * -g <br>    * Only for classifiers that implement "Graphable." Outputs   * the graph representation of the classifier (and nothing   * else). <p>   *   * -xml filename | xml-string <br>   * Retrieves the options from the XML-data instead of the command line. <p>   *   * @param classifier machine learning classifier   * @param options the array of string containing the options   * @exception Exception if model could not be evaluated successfully   * @return a string describing the results    */  public static String evaluateModel(Classifier classifier,				     String [] options) throws Exception {			          Instances train = null, tempTrain, test = null, template = null;    int seed = 1, folds = 10, classIndex = -1;    String trainFileName, testFileName, sourceClass,       classIndexString, seedString, foldsString, objectInputFileName,       objectOutputFileName, attributeRangeString;    boolean noOutput = false,      printClassifications = false, trainStatistics = true,      printMargins = false, printComplexityStatistics = false,      printGraph = false, classStatistics = false, printSource = false;    StringBuffer text = new StringBuffer();    BufferedReader trainReader = null, testReader = null;    ObjectInputStream objectInputStream = null;    BufferedInputStream xmlInputStream = null;    CostMatrix costMatrix = null;    StringBuffer schemeOptionsText = null;    Range attributesToOutput = null;    long trainTimeStart = 0, trainTimeElapsed = 0,      testTimeStart = 0, testTimeElapsed = 0;    String xml = "";    String[] optionsTmp = null;    Classifier classifierBackup;    try {      // do we get the input from XML instead of normal parameters?      xml = Utils.getOption("xml", options);      if (!xml.equals(""))         options = new XMLOptions(xml).toArray();      // is the input model only the XML-Options, i.e. w/o built model?      optionsTmp = new String[options.length];      for (int i = 0; i < options.length; i++)         optionsTmp[i] = options[i];      if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {         // load options from serialized data ('-l' is automatically erased!)         XMLClassifier xmlserial = new XMLClassifier();         Classifier cl = (Classifier) xmlserial.read(Utils.getOption('l', options));         // merge options         optionsTmp = new String[options.length + cl.getOptions().length];         System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);         System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);         options = optionsTmp;      }      // Get basic options (options the same for all schemes)      classIndexString = Utils.getOption('c', options);      if (classIndexString.length() != 0) {	classIndex = Integer.parseInt(classIndexString);      }      trainFileName = Utils.getOption('t', options);       objectInputFileName = Utils.getOption('l', options);      objectOutputFileName = Utils.getOption('d', options);      testFileName = Utils.getOption('T', options);      if (trainFileName.length() == 0) {         if (objectInputFileName.length() == 0) {            throw new Exception("No training file and no object "+            "input file given.");         }          if (testFileName.length() == 0) {            throw new Exception("No training file and no test "+            "file given.");         }      } else if ((objectInputFileName.length() != 0) &&      ((!(classifier instanceof UpdateableClassifier)) ||      (testFileName.length() == 0))) {         throw new Exception("Classifier not incremental, or no " +         "test file provided: can't "+         "use both train and model file.");      }      try {	if (trainFileName.length() != 0) {	  trainReader = new BufferedReader(new FileReader(trainFileName));	}	if (testFileName.length() != 0) {	  testReader = new BufferedReader(new FileReader(testFileName));	}	if (objectInputFileName.length() != 0) {          InputStream is = new FileInputStream(objectInputFileName);          if (objectInputFileName.endsWith(".gz")) {            is = new GZIPInputStream(is);          }     // load from KOML?     if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent()) ) {        objectInputStream = new ObjectInputStream(is);        xmlInputStream    = null;     }     else {        objectInputStream = null;        xmlInputStream    = new BufferedInputStream(is);     }	}      } catch (Exception e) {	throw new Exception("Can't open file " + e.getMessage() + '.');      }      if (testFileName.length() != 0) {	template = test = new Instances(testReader, 1);	if (classIndex != -1) {	  test.setClassIndex(classIndex - 1);	} else {	  test.setClassIndex(test.numAttributes() - 1);	}	if (classIndex > test.numAttributes()) {	  throw new Exception("Index of class attribute too large.");	}      }      if (trainFileName.length() != 0) {	if ((classifier instanceof UpdateableClassifier) &&	    (testFileName.length() != 0)) {	  train = new Instances(trainReader, 1);	} else {	  train = new Instances(trainReader);	}        template = train;	if (classIndex != -1) {	  train.setClassIndex(classIndex - 1);	} else {	  train.setClassIndex(train.numAttributes() - 1);	}	if ((testFileName.length() != 0) && !test.equalHeaders(train)) {	  throw new IllegalArgumentException("Train and test file not compatible!");	}	if (classIndex > train.numAttributes()) {	  throw new Exception("Index of class attribute too large.");	}	//train = new Instances(train);      }      if (template == null) {        throw new Exception("No actual dataset provided to use as template");      }      seedString = Utils.getOption('s', options);      if (seedString.length() != 0) {	seed = Integer.parseInt(seedString);      }      foldsString = Utils.getOption('x', options);      if (foldsString.length() != 0) {	folds = Integer.parseInt(foldsString);      }      costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());      classStatistics = Utils.getFlag('i', options);      noOutput = Utils.getFlag('o', options);      trainStatistics = !Utils.getFlag('v', options);      printComplexityStatistics = Utils.getFlag('k', options);      printMargins = Utils.getFlag('r', options);      printGraph = Utils.getFlag('g', options);      sourceClass = Utils.getOption('z', options);      printSource = (sourceClass.length() != 0);      // Check -p option      try {	attributeRangeString = Utils.getOption('p', options);      }      catch (Exception e) {	throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +			    "It now expects a parameter specifying a range of attributes " +			    "to list with the predictions. Use '-p 0' for none.");      }      if (attributeRangeString.length() != 0) {	printClassifications = true;	if (!attributeRangeString.equals("0")) 	  attributesToOutput = new Range(attributeRangeString);      }      // If a model file is given, we can't process       // scheme-specific options      if (objectInputFileName.length() != 0) {	Utils.checkForRemainingOptions(options);      } else {	// Set options for classifier	if (classifier instanceof OptionHandler) {	  for (int i = 0; i < options.length; i++) {	    if (options[i].length() != 0) {	      if (schemeOptionsText == null) {		schemeOptionsText = new StringBuffer();	      }	      if (options[i].indexOf(' ') != -1) {		schemeOptionsText.append('"' + options[i] + "\" ");	      } else {		schemeOptionsText.append(options[i] + " ");	      }	    }	  }	  ((OptionHandler)classifier).setOptions(options);	}      }      Utils.checkForRemainingOptions(options);    } catch (Exception e) {      throw new Exception("\nWeka exception: " + e.getMessage()			   + makeOptionString(classifier));    }    // Setup up evaluation objects    Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);    Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);    if (objectInputFileName.length() != 0) {      // Load classifier from file      if (objectInputStream != null) {         classifier = (Classifier) objectInputStream.readObject();         objectInputStream.close();      }      else {         // whether KOML is available has already been checked (objectInputStream would null otherwise)!         classifier = (Classifier) KOML.read(xmlInputStream);         xmlInputStream.close();      }    }    // backup of fully setup classifier for cross-validation    classifierBackup = Classifier.makeCopy(classifier);        // Build the classifier if no object file provided    if ((classifier instanceof UpdateableClassifier) &&	(testFileName.length() != 0) &&	(costMatrix == null) &&	(trainFileName.length() != 0)) {      // Build classifier incrementally      trainingEvaluation.setPriors(train);      testingEvaluation.setPriors(train);      trainTimeStart = System.currentTimeMillis();      if (objectInputFileName.length() == 0) {	classifier.buildClassifier(train);      }      while (train.readInstance(trainReader)) {		trainingEvaluation.updatePriors(train.instance(0));	testingEvaluation.updatePriors(train.instance(0));	((UpdateableClassifier)classifier).	  updateClassifier(train.instance(0));	train.delete(0);      }      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;      trainReader.close();    } else if (objectInputFileName.length() == 0) {      // Build classifier in one go      tempTrain = new Instances(train);      trainingEvaluation.setPriors(tempTrain);      testingEvaluation.setPriors(tempTrain);      trainTimeStart = System.currentTimeMillis();      classifier.buildClassifier(tempTrain);      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;    }     // Save the classifier if an object output file is provided    if (objectOutputFileName.length() != 0) {      OutputStream os = new FileOutputStream(objectOutputFileName);      // binary      if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {         if (objectOutputFileName.endsWith(".gz")) {           os = new GZIPOutputStream(os);         }         ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);         objectOutputStream.writeObject(classifier);         objectOutputStream.flush();         objectOutputStream.close();      }      // KOML/XML      else {         BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);         if (objectOutputFileName.endsWith(".xml")) {            XMLSerialization xmlSerial = new XMLClassifier();            xmlSerial.write(xmlOutputStream, classifier);         }         else         // whether KOML is present has already been checked         // if not present -> ".koml" is interpreted as binary - see above

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?