📄 evaluation.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	if (classIndex != -1) {
	  train.setClassIndex(classIndex - 1);
	} else {
	  train.setClassIndex(train.numAttributes() - 1);
	}
	testTimeStart = System.currentTimeMillis();
	while (train.readInstance(trainReader)) {

	  trainingEvaluation.
	  evaluateModelOnce((Classifier)classifier, 
			    train.instance(0));
	  train.delete(0);
	}
	testTimeElapsed = System.currentTimeMillis() - testTimeStart;
	trainReader.close();
      } else {
	testTimeStart = System.currentTimeMillis();
	trainingEvaluation.evaluateModel(classifier, 
					 train);
	testTimeElapsed = System.currentTimeMillis() - testTimeStart;
      }

      // Print the results of the training evaluation
      if (printMargins) {
	return trainingEvaluation.toCumulativeMarginDistributionString();
      } else {
	text.append("\nTime taken to build model: " +
		    Utils.doubleToString(trainTimeElapsed / 1000.0,2) +
		    " seconds");
	text.append("\nTime taken to test model on training data: " +
		    Utils.doubleToString(testTimeElapsed / 1000.0,2) +
		    " seconds");
	text.append(trainingEvaluation.
		    toSummaryString("\n\n=== Error on training" + 
				    " data ===\n", printComplexityStatistics));
	if (template.classAttribute().isNominal()) {
	  if (classStatistics) {
	    text.append("\n\n" + trainingEvaluation.toClassDetailsString());
	  }
	  text.append("\n\n" + trainingEvaluation.toMatrixString());
	}
	
      }
    }

    // Compute proper error estimates
    if (testFileName.length() != 0) {

      // Testing is on the supplied test data
      while (test.readInstance(testReader)) {
	  
	testingEvaluation.evaluateModelOnce((Classifier)classifier, 
                                            test.instance(0));
	test.delete(0);
      }
      testReader.close();

      text.append("\n\n" + testingEvaluation.
		  toSummaryString("=== Error on test data ===\n",
				  printComplexityStatistics));
    } else if (trainFileName.length() != 0) {

      // Testing is via cross-validation on training data
      Random random = new Random(seed);
      testingEvaluation.crossValidateModel(classifier, train, folds, random);
      if (template.classAttribute().isNumeric()) {
	text.append("\n\n\n" + testingEvaluation.
		    toSummaryString("=== Cross-validation ===\n",
				    printComplexityStatistics));
      } else {
	text.append("\n\n\n" + testingEvaluation.
		    toSummaryString("=== Stratified " + 
				    "cross-validation ===\n",
				    printComplexityStatistics));
      }
    }
    if (template.classAttribute().isNominal()) {
      if (classStatistics) {
	text.append("\n\n" + testingEvaluation.toClassDetailsString());
      }
      text.append("\n\n" + testingEvaluation.toMatrixString());
    }
    return text.toString();
  }


  /**
   * Attempts to load a cost matrix.
   *
   * @param costFileName the filename of the cost matrix
   * @param numClasses the number of classes that should be in the cost matrix
   * (only used if the cost file is in old format).
   * @return a <code>CostMatrix</code> value, or null if costFileName is empty
   * @exception Exception if an error occurs.
   */
  protected static CostMatrix handleCostOption(String costFileName, 
                                             int numClasses) 
    throws Exception {

    if ((costFileName != null) && (costFileName.length() != 0)) {
      System.out.println(
           "NOTE: The behaviour of the -m option has changed between WEKA 3.0"
           +" and WEKA 3.1. -m now carries out cost-sensitive *evaluation*"
           +" only. For cost-sensitive *prediction*, use one of the"
           +" cost-sensitive metaschemes such as"
           +" weka.classifiers.meta.CostSensitiveClassifier or"
           +" weka.classifiers.meta.MetaCost");

      Reader costReader = null;
      try {
        costReader = new BufferedReader(new FileReader(costFileName));
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      try {
        // First try as a proper cost matrix format
        return new CostMatrix(costReader);
      } catch (Exception ex) {
        try {
          // Now try as the poxy old format :-)
          //System.err.println("Attempting to read old format cost file");
          try {
            costReader.close(); // Close the old one
            costReader = new BufferedReader(new FileReader(costFileName));
          } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
          }
          CostMatrix costMatrix = new CostMatrix(numClasses);
          //System.err.println("Created default cost matrix");
          costMatrix.readOldFormat(costReader);
          return costMatrix;
          //System.err.println("Read old format");
        } catch (Exception e2) {
          // re-throw the original exception
          //System.err.println("Re-throwing original exception");
          throw ex;
        }
      }
    } else {
      return null;
    }
  }

  /**
   * Evaluates the classifier on a given set of instances. Note that
   * the data must have exactly the same format (e.g. order of
   * attributes) as the data used to train the classifier! Otherwise
   * the results will generally be meaningless.
   *
   * @param classifier machine learning classifier
   * @param data set of test instances for evaluation
   * @exception Exception if model could not be evaluated 
   * successfully 
   */
  public void evaluateModel(Classifier classifier,
			    Instances data) throws Exception {
    
    double [] predicted;

    for (int i = 0; i < data.numInstances(); i++) {
      evaluateModelOnce((Classifier)classifier, 
			data.instance(i));
    }
  }
  //<<30/01/2005, Frank J. Xu
  //Modified functions for model evaluation used in ETI KBBI Platform.
  /**
   * Evaluates the classifier on a given set of instances. Note that
   * the data must have exactly the same format (e.g. order of
   * attributes) as the data used to train the classifier! Otherwise
   * the results will generally be meaningless.
   *
   * @param classifier machine learning classifier
   * @param data set of test instances for evaluation
   * @exception Exception if model could not be evaluated 
   * successfully 
   */
  public void eti_evaluateModel(Classifier classifier,
			    Instances data,
				double threshold,
				double confidence) throws Exception {
    
    double [] predicted;

    for (int i = 0; i < data.numInstances(); i++) {
      eti_evaluateModelOnce((Classifier)classifier, 
			data.instance(i),
			threshold,
			confidence);
    }
  }
  /**
   * Evaluates the classifier on a single instance.
   *
   * @param classifier machine learning classifier
   * @param instance the test instance to be classified
   * @return the prediction made by the clasifier
   * @exception Exception if model could not be evaluated 
   * successfully or the data contains string attributes
   */
  public double eti_evaluateModelOnce(Classifier classifier,
				  Instance instance,
				  double threshold,
				  double confidence) throws Exception {
  
    Instance classMissing = (Instance)instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
    	double [] dist = classifier.distributionForInstance(classMissing);
    	//For Logistic classifier with two classes, use threshold in KBBI platform.
    	if(classifier instanceof Logistic && dist.length == 2)
    		pred = dist[0]>threshold?0:1;
    	else
    		pred = Utils.maxIndex(dist);    	
    	if (dist[(int)pred] <= 0) {
    		pred = Instance.missingValue();
	}
    	eti_updateStatsForClassifier(dist, instance, (int)pred, confidence);
    } else {
    	pred = classifier.classifyInstance(classMissing);
    	updateStatsForPredictor(pred, instance);
    }
    return pred;
  }  
  
  /**
   * Updates all the statistics about a classifiers performance for 
   * the current test instance.
   *
   * @param predictedDistribution the probabilities assigned to 
   * each class
   * @param instance the instance to be classified
   * @exception Exception if the class of the instance is not
   * set
   */
  protected void eti_updateStatsForClassifier(double [] predictedDistribution,
					Instance instance,
					int predictedIndex,
					double confidence)
       throws Exception {

    int actualClass = (int)instance.classValue();
    double costFactor = 1;

    if (!instance.classIsMissing()) {
      updateMargins(predictedDistribution, actualClass, instance.weight());

      // Determine the predicted class (doesn't detect multiple 
      // classifications)
      int predictedClass = -1;
      double bestProb = 0.0;
      for(int i = 0; i < m_NumClasses; i++) {
	if (predictedDistribution[i] > bestProb) {
	  predictedClass = i;
	  bestProb = predictedDistribution[i];
	}
      }

      m_WithClass += instance.weight();

      // Determine misclassification cost
      if (m_CostMatrix != null) {
        if (predictedClass < 0) {
          // For missing predictions, we assume the worst possible cost.
          // This is pretty harsh.
          // Perhaps we could take the negative of the cost of a correct
          // prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
          // although often this will be zero
          m_TotalCost += instance.weight()
            * m_CostMatrix.getMaxCost(actualClass);
        } else {
          m_TotalCost += instance.weight() 
            * m_CostMatrix.getElement(actualClass, predictedClass);
        }
      }

      // Update counts when no class was predicted
      if (predictedClass < 0) {
	m_Unclassified += instance.weight();
	return;
      }

      double predictedProb = Math.max(MIN_SF_PROB,
				      predictedDistribution[actualClass]);
      double priorProb = Math.max(MIN_SF_PROB,
				  m_ClassPriors[actualClass]
				  / m_ClassPriorsSum);
      if (predictedProb >= priorProb) {
	m_SumKBInfo += (Utils.log2(predictedProb) - 
			Utils.log2(priorProb))
	  * instance.weight();
      } else {
	m_SumKBInfo -= (Utils.log2(1.0-predictedProb) - 
			Utils.log2(1.0-priorProb))
	  * instance.weight();
      }

      m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
      m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

      updateNumericScores(predictedDistribution, 
			  makeDistribution(instance.classValue()), 
			  instance.weight());

      // Update other stats
      m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
      if (predictedClass != actualClass) {
      	m_Incorrect += instance.weight();
      } else {
      	m_Correct += instance.weight();
      	if(predictedDistribution[predictedIndex] >= confidence)
      		m_CorrectNumWithSpecifiedConf[actualClass]++;      	
      }
    } else {
      m_MissingClass += instance.weight();
    }
  }  
    
  /**
   * Calculate the true positive rate with respect to a particular class. 
   * This is defined as<p>
   * <pre>
   * correctly classified positives with confidence level > specified value
   * ----------------------------------------------------------------------
   *                   correctly classified positives
   * </pre>
   *
   * @param classIndex the index of the class to consider as "positive"
   * @return the true positive rate
   */
  public double eti_truePositiveConfidence(int classIndex) {

    double correct = 0;
    for (int j = 0; j < m_NumClasses; j++) {
      if (j == classIndex) {
      	correct += m_ConfusionMatrix[classIndex][j];
      	break;
      }
    }
    if (correct == 0) {
      return 0;
    }
    return  m_CorrectNumWithSpecifiedConf[classIndex]/correct;
  }  
  //30/01/2005, Frank J. Xu>>
  /**
   * Evaluates the classifier on a single instance.
   *
   * @param classifier machine learning classifier
   * @param instance the test instance to be classified
   * @return the prediction made by the clasifier
   * @exception Exception if model could not be evaluated 
   * successfully or the data contains string attributes
   */
  public double evaluateModelOnce(Classifier classifier,
				  Instance instance) throws Exception {
  
    Instance classMissing = (Instance)instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
	double [] dist = classifier.distributionForInstance(classMissing);
	pred = Utils.maxIndex(dist);
	if (dist[(int)pred] <= 0) {
	  pred = Instance.missingValue();
	}
	updateStatsForClassifier(dist, instance);
    } else {
      pred = classifier.classifyInstance(classMissing);
      updateStatsForPredictor(pred, instance);
    }
    return pred;
  }

  /**
   * Evaluates the supplied distribution on a single instance.
   *
   * @param dist the supplied distribution
   * @param instance the test instance to be classified
   * @exception Exception if model could not be evaluated 
   * successfully
   */
  public double evaluateModelOnce(double [] dist, 
				  Instance instance) throws Exception {
    double pred;
    if (m_ClassIsNominal) {
      pred = Utils.maxIndex(dist);
      if (dist[(int)pred] <= 0) {
	pred = Instance.missingValue();
      }
      updateStatsForClassifier(dist, instance);
    } else {
      pred = dist[0];
      updateStatsForPredictor(pred, instance);
    }
    return pred;
  }

  /**
   * Evaluates the supplied prediction on a single instance.
   *
   * @param prediction the supplied prediction
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -