📄 classifiersplitevaluator.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
      if (m_Template instanceof AdditionalMeasureProducer) {	Enumeration en = ((AdditionalMeasureProducer)m_Template).	  enumerateMeasures();	while (en.hasMoreElements()) {	  String mname = (String)en.nextElement();	  for (int j=0;j<m_AdditionalMeasures.length;j++) {	    if (mname.compareToIgnoreCase(m_AdditionalMeasures[j]) == 0) {	      m_doesProduce[j] = true;	    }	  }	}      }    } else {      m_doesProduce = null;    }  }  /**   * Returns an enumeration of any additional measure names that might be   * in the classifier   * @return an enumeration of the measure names   */  public Enumeration enumerateMeasures() {    Vector newVector = new Vector();    if (m_Template instanceof AdditionalMeasureProducer) {      Enumeration en = ((AdditionalMeasureProducer)m_Template).	enumerateMeasures();      while (en.hasMoreElements()) {	String mname = (String)en.nextElement();	newVector.addElement(mname);      }    }    return newVector.elements();  }    /**   * Returns the value of the named measure   * @param additionalMeasureName the name of the measure to query for its value   * @return the value of the named measure   * @throws IllegalArgumentException if the named measure is not supported   */  public double getMeasure(String additionalMeasureName) {    if (m_Template instanceof AdditionalMeasureProducer) {      if (m_Classifier == null) {	throw new IllegalArgumentException("ClassifierSplitEvaluator: " +					   "Can't return result for measure, " +					   "classifier has not been built yet.");      }      return ((AdditionalMeasureProducer)m_Classifier).	getMeasure(additionalMeasureName);    } else {      throw new IllegalArgumentException("ClassifierSplitEvaluator: "			  +"Can't return value for : "+additionalMeasureName			  +". "+m_Template.getClass().getName()+" "			  +"is not an AdditionalMeasureProducer");    }  }  /**   * Gets the data types of each of the key columns produced for a single run.   * The number of key fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing objects of the type of each key column. The    * objects should be Strings, or Doubles.   */  public Object [] getKeyTypes() {    Object [] keyTypes = new Object[KEY_SIZE];    keyTypes[0] = "";    keyTypes[1] = "";    keyTypes[2] = "";    return keyTypes;  }  /**   * Gets the names of each of the key columns produced for a single run.   * The number of key fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing the name of each key column   */  public String [] getKeyNames() {    String [] keyNames = new String[KEY_SIZE];    keyNames[0] = "Scheme";    keyNames[1] = "Scheme_options";    keyNames[2] = "Scheme_version_ID";    return keyNames;  }  /**   * Gets the key describing the current SplitEvaluator. For example   * This may contain the name of the classifier used for classifier   * predictive evaluation. The number of key fields must be constant   * for a given SplitEvaluator.   *   * @return an array of objects containing the key.   */  public Object [] getKey(){    Object [] key = new Object[KEY_SIZE];    key[0] = m_Template.getClass().getName();    key[1] = m_ClassifierOptions;    key[2] = m_ClassifierVersion;    return key;  }  /**   * Gets the data types of each of the result columns produced for a    * single run. The number of result fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing objects of the type of each result column.    * The objects should be Strings, or Doubles.   */  public Object [] getResultTypes() {    int addm = (m_AdditionalMeasures != null)       ? m_AdditionalMeasures.length       : 0;    int overall_length = RESULT_SIZE+addm;    overall_length += NUM_IR_STATISTICS;    if (getAttributeID() >= 0) overall_length += 1;    if (getPredTargetColumn()) overall_length += 2;    Object [] resultTypes = new Object[overall_length];    Double doub = new Double(0);    int current = 0;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // IR stats    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // Timing stats    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // ID/Targets/Predictions    if (getAttributeID() >= 0) resultTypes[current++] = "";    if (getPredTargetColumn()){        resultTypes[current++] = "";        resultTypes[current++] = "";    }        // Classifier defined extras    resultTypes[current++] = "";    // add any additional measures    for (int i=0;i<addm;i++) {      resultTypes[current++] = doub;    }    if (current != overall_length) {      throw new Error("ResultTypes didn't fit RESULT_SIZE");    }    return resultTypes;  }  /**   * Gets the names of each of the result columns produced for a single run.   * The number of result fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing the name of each result column   */  public String [] getResultNames() {    int addm = (m_AdditionalMeasures != null)       ? m_AdditionalMeasures.length       : 0;    int overall_length = RESULT_SIZE+addm;    overall_length += NUM_IR_STATISTICS;    if (getAttributeID() >= 0) overall_length += 1;    if (getPredTargetColumn()) overall_length += 2;    String [] resultNames = new String[overall_length];    int current = 0;    resultNames[current++] = "Number_of_training_instances";    resultNames[current++] = "Number_of_testing_instances";    // Basic performance stats - right vs wrong    resultNames[current++] = "Number_correct";    resultNames[current++] = "Number_incorrect";    resultNames[current++] = "Number_unclassified";    resultNames[current++] = "Percent_correct";    resultNames[current++] = "Percent_incorrect";    resultNames[current++] = "Percent_unclassified";    resultNames[current++] = "Kappa_statistic";    // Sensitive stats - certainty of predictions    resultNames[current++] = "Mean_absolute_error";    resultNames[current++] = "Root_mean_squared_error";    resultNames[current++] = "Relative_absolute_error";    resultNames[current++] = "Root_relative_squared_error";    // SF stats    resultNames[current++] = "SF_prior_entropy";    resultNames[current++] = "SF_scheme_entropy";    resultNames[current++] = "SF_entropy_gain";    resultNames[current++] = "SF_mean_prior_entropy";    resultNames[current++] = "SF_mean_scheme_entropy";    resultNames[current++] = "SF_mean_entropy_gain";    // K&B stats    resultNames[current++] = "KB_information";    resultNames[current++] = "KB_mean_information";    resultNames[current++] = "KB_relative_information";    // IR stats    resultNames[current++] = "True_positive_rate";    resultNames[current++] = "Num_true_positives";    resultNames[current++] = "False_positive_rate";    resultNames[current++] = "Num_false_positives";    resultNames[current++] = "True_negative_rate";    resultNames[current++] = "Num_true_negatives";    resultNames[current++] = "False_negative_rate";    resultNames[current++] = "Num_false_negatives";    resultNames[current++] = "IR_precision";    resultNames[current++] = "IR_recall";    resultNames[current++] = "F_measure";    resultNames[current++] = "Area_under_ROC";    // Timing stats    resultNames[current++] = "Elapsed_Time_training";    resultNames[current++] = "Elapsed_Time_testing";    resultNames[current++] = "UserCPU_Time_training";    resultNames[current++] = "UserCPU_Time_testing";    // ID/Targets/Predictions    if (getAttributeID() >= 0) resultNames[current++] = "Instance_ID";    if (getPredTargetColumn()){        resultNames[current++] = "Targets";        resultNames[current++] = "Predictions";    }        // Classifier defined extras    resultNames[current++] = "Summary";    // add any additional measures    for (int i=0;i<addm;i++) {      resultNames[current++] = m_AdditionalMeasures[i];    }    if (current != overall_length) {      throw new Error("ResultNames didn't fit RESULT_SIZE");    }    return resultNames;  }  /**   * Gets the results for the supplied train and test datasets. Now performs   * a deep copy of the classifier before it is built and evaluated (just in case   * the classifier is not initialized properly in buildClassifier()).   *   * @param train the training Instances.   * @param test the testing Instances.   * @return the results stored in an array. The objects stored in   * the array may be Strings, Doubles, or null (for the missing value).   * @throws Exception if a problem occurs while getting the results   */  public Object [] getResult(Instances train, Instances test)  throws Exception {        if (train.classAttribute().type() != Attribute.NOMINAL) {      throw new Exception("Class attribute is not nominal!");    }    if (m_Template == null) {      throw new Exception("No classifier has been specified");    }    int addm = (m_AdditionalMeasures != null) ? m_AdditionalMeasures.length : 0;    int overall_length = RESULT_SIZE+addm;    overall_length += NUM_IR_STATISTICS;    if (getAttributeID() >= 0) overall_length += 1;    if (getPredTargetColumn()) overall_length += 2;        ThreadMXBean thMonitor = ManagementFactory.getThreadMXBean();    boolean canMeasureCPUTime = thMonitor.isThreadCpuTimeSupported();    if(!thMonitor.isThreadCpuTimeEnabled())      thMonitor.setThreadCpuTimeEnabled(true);        Object [] result = new Object[overall_length];    Evaluation eval = new Evaluation(train);    m_Classifier = Classifier.makeCopy(m_Template);    double [] predictions;    long thID = Thread.currentThread().getId();    long CPUStartTime=-1, trainCPUTimeElapsed=-1, testCPUTimeElapsed=-1,         trainTimeStart, trainTimeElapsed, testTimeStart, testTimeElapsed;        //training classifier    trainTimeStart = System.currentTimeMillis();    if(canMeasureCPUTime)      CPUStartTime = thMonitor.getThreadUserTime(thID);    m_Classifier.buildClassifier(train);        if(canMeasureCPUTime)      trainCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime;    trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;        //testing classifier
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -