dedupingprcurvecvresultproducersplit.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,053 行 · 第 1/3 页
JAVA
1,053 行
						 "weka.clusterers.");	      resultName = Utils.removeSubstring(resultName, 						 "weka.filters.");	      resultName = Utils.removeSubstring(resultName, 						 "weka.attributeSelection.");	      resultName = Utils.removeSubstring(resultName, 						 "weka.deduping.");	      m_zipDest.zipit(m_splitEvaluator.getRawResultOutput(), resultName);	    }	    m_resultListener.acceptResult(this, key, results);	  } catch (Exception ex) {	    // Save the train and test datasets for debugging purposes?	    throw ex;	  }	}      }    }  }  /** Given a set of instances with the class attribute containing true   * objectID, create a list of folds using the preferred method   *   * @param runInstances a set of instances with class labels   * @param numFolds the number of folds to create   * @return a list of Instances objects, every Instances contains a fold   */  ArrayList createFoldList(Instances runInstances, int numFolds) {    ArrayList foldList = null;    switch (m_foldCreationMode) {    case FOLD_CREATION_MODE_STRATIFIED:      foldList =  createFoldListStratified(runInstances, numFolds);      break;    case FOLD_CREATION_MODE_RANDOM:      foldList =  createFoldListRandom(runInstances, numFolds);      break;    default:      System.err.println("Panic!  Unknown fold creation mode: " + m_foldCreationMode);      System.exit(1);    }    return foldList;  }    /** Given a set of instances with the class attribute containing true   * objectID, create a list of folds containing an approximately equal number of class values (equivalence classes)   * Objects with the same class ID *always* end in the same fold   *   * @param runInstances a set of instances with class labels   * @param numFolds the number of folds to create   * @return a list of Instances objects, every Instances contains a fold   */  ArrayList createFoldListStratified(Instances runInstances, int numFolds) {    HashMap classFoldHash = new HashMap();    ArrayList foldList = new ArrayList(numFolds);    int numInstances = runInstances.numInstances();    Random rand = new Random(numFolds + numInstances);    // initialize the folds    for (int i=0; i < numFolds; i++) {      Instances fold = new Instances(runInstances, numInstances/numFolds);      foldList.add(fold);    }    // assign each class to a random fold    for (int i=0; i < runInstances.numInstances(); i++) {      Instance instance = runInstances.instance(i);      Double classValue = new Double(instance.classValue());      if (classFoldHash.containsKey(classValue)) {	Instances fold = (Instances) classFoldHash.get(classValue);	fold.add(instance);      } else {	// this class has not been seen before, assign to a random fold	int foldIdx = rand.nextInt(numFolds);	Instances fold = (Instances) foldList.get(foldIdx);	fold.add(instance);	classFoldHash.put(classValue, fold);      }    }    return foldList;  }  /** Given a set of instances with the class attribute containing true   * objectID, create a list of *randomly assigned* folds disregarding stratification   *   * @param runInstances a set of instances with class labels   * @param numFolds the number of folds to create   * @return a list of Instances objects, every Instances contains a fold   */  ArrayList createFoldListRandom(Instances runInstances, int numFolds) {    ArrayList foldList = new ArrayList(numFolds);    int numInstances = runInstances.numInstances();    Random rand = new Random(numFolds + numInstances);	    // initialize the folds    for (int i=0; i < numFolds; i++) {      Instances fold = new Instances(runInstances, numInstances/numFolds);      foldList.add(fold);    }    // Create all positive pairs and assign them to random folds    // first, hash all classes    HashMap classInstanceListHash = new HashMap();    for (int i=0; i < runInstances.numInstances(); i++) {      Instance instance = runInstances.instance(i);      Double classValue = new Double(instance.classValue());      if (classInstanceListHash.containsKey(classValue)) {	ArrayList instanceList = (ArrayList) classInstanceListHash.get(classValue);	instanceList.add(instance);      } else {	// this class has not been seen before, create a new list for it	ArrayList instanceList = new ArrayList();	instanceList.add(instance);	classInstanceListHash.put(classValue, instanceList);      }    }    int [] foldAssignments = new int[runInstances.numInstances()];    Arrays.fill(foldAssignments, -1);    // go through the classes; each pair gets assigned to a random fold; singletons are also thrown    // into a random fold    Iterator iterator = classInstanceListHash.values().iterator();    while (iterator.hasNext()) {      ArrayList instanceList = (ArrayList) iterator.next();      int classSize = instanceList.size();      if (classSize > 1) {	// go through all pairs and assign both instances of each pair to a random fold	boolean[][] foldInstancesAdded = new boolean[numFolds][classSize];	for (int i=0; i < classSize-1; i++) {	  Instance instance1 = (Instance) instanceList.get(i);	  for (int j=i+1; j < classSize; j++) {	    Instance instance2 = (Instance) instanceList.get(j);	    int foldIdx = rand.nextInt(numFolds);	    Instances fold = (Instances) foldList.get(foldIdx);	    // add the two instances to the random fold unless they have previously been added	    if (foldInstancesAdded[foldIdx][i] == false) { 	      fold.add(instance1);	      foldInstancesAdded[foldIdx][i] = true;	    }	    if (foldInstancesAdded[foldIdx][j] == false) {	      fold.add(instance2);	      foldInstancesAdded[foldIdx][j] = true;	    }	  }	}      } else {	// singleton class, assign to a random fold	Instance instance = (Instance) instanceList.get(0);	int foldIdx = rand.nextInt(numFolds);	Instances fold = (Instances) foldList.get(foldIdx);	fold.add(instance);      }     }     return foldList;  }  /** Given a list of folds, merge together all but the test fold with the specified index   * and return the resulting training fold   * @param foldList a list containg folds   * @param testFoldIdx the index of the fold that will be used for testing   * @return an agglomeration of all folds except for the test one.   */  protected Instances getTrainingFold(ArrayList foldList, int testFoldIdx) {    Instances sampleFold = (Instances) foldList.get(0);    Instances trainFold = new Instances(sampleFold, sampleFold.numInstances());    for (int i = 0; i < foldList.size(); i++) {      if (i != testFoldIdx) {	Instances nextFold = (Instances) foldList.get(i);	for (int j = 0; j < nextFold.numInstances(); j++) {	  Instance nextInstance = (Instance) nextFold.instance(j);	  trainFold.add(nextInstance);	}      }    }    return trainFold;  }  /** Given an array containing the overall results of a deduping   * experiment, produce an array containing results for a specific   * recall level   */  protected Object [] processResults(Object[] prResults, double recallLevel) {    double maxPrecision = 0;    double maxFM = 0;     Object[] results = (Object[]) prResults[prResults.length-1];        //    System.out.println(results[1] + "\t" + results[2] + "\t" + results[3]);    for (int i = prResults.length-1; i >= 0; i--) {      Object[] nextResults = (Object[]) prResults[i];      //System.out.println(nextResults[1] + "\t" + nextResults[2] + "\t" + nextResults[3]);      double recall = ((Double) nextResults[1]).doubleValue();      if (recall < recallLevel) {	break;      }      double precision = ((Double) nextResults[2]).doubleValue();      if (precision > maxPrecision) {	maxPrecision = precision;	results = nextResults;      }      double fmeasure = ((Double) nextResults[3]).doubleValue();      if (fmeasure > maxFM) {	maxFM = fmeasure;      }    }    Object [] returnResults = new Object [results.length];    System.arraycopy(results, 0, returnResults, 0, results.length);    returnResults[1] = new Double(recallLevel);    returnResults[3] = new Double(maxFM);    return returnResults;  }   /**   * Gets the names of each of the columns produced for a single run.   * This method should really be static.   *   * @return an array containing the name of each column   */  public String [] getKeyNames() {    String [] keyNames = m_splitEvaluator.getKeyNames();    // Add in the names of our extra key fields    int numExtraKeys = 4;    String [] newKeyNames = new String [keyNames.length + numExtraKeys];    newKeyNames[0] = DATASET_FIELD_NAME;    newKeyNames[1] = RUN_FIELD_NAME;    newKeyNames[2] = FOLD_FIELD_NAME;    newKeyNames[3] = RECALL_FIELD_NAME;    System.arraycopy(keyNames, 0, newKeyNames, numExtraKeys, keyNames.length);    return newKeyNames;  }  /**   * Gets the data types of each of the columns produced for a single run.   * This method should really be static.   *   * @return an array containing objects of the type of each column. The    * objects should be Strings, or Doubles.   */  public Object [] getKeyTypes() {    Object [] keyTypes = m_splitEvaluator.getKeyTypes();    int numExtraKeys = 4;    // Add in the types of our extra fields    Object [] newKeyTypes = new String [keyTypes.length + numExtraKeys];    newKeyTypes[0] = new String();    newKeyTypes[1] = new String();    newKeyTypes[2] = new String();    newKeyTypes[3] = new String();    System.arraycopy(keyTypes, 0, newKeyTypes, numExtraKeys, keyTypes.length);    return newKeyTypes;  }  /**   * Gets the names of each of the columns produced for a single run.   * This method should really be static.   *   * @return an array containing the name of each column   */  public String [] getResultNames() {    String [] resultNames = m_splitEvaluator.getResultNames();    // Add in the names of our extra Result fields    String [] newResultNames = new String [resultNames.length + 1];    newResultNames[0] = TIMESTAMP_FIELD_NAME;    System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);    return newResultNames;  }  /**   * Gets the data types of each of the columns produced for a single run.   * This method should really be static.   *   * @return an array containing objects of the type of each column. The    * objects should be Strings, or Doubles.   */  public Object [] getResultTypes() {    Object [] resultTypes = m_splitEvaluator.getResultTypes();    // Add in the types of our extra Result fields    Object [] newResultTypes = new Object [resultTypes.length + 1];    newResultTypes[0] = new Double(0);    System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);    return newResultTypes;  }  /**   * Gets a description of the internal settings of the result   * producer, sufficient for distinguishing a ResultProducer   * instance from another with different settings (ignoring   * those settings set through this interface). For example,   * a cross-validation ResultProducer may have a setting for the   * number of folds. For a given state, the results produced should   * be compatible. Typically if a ResultProducer is an OptionHandler,   * this string will represent the command line arguments required   * to set the ResultProducer to that state.   *   * @return the description of the ResultProducer state, or null   * if no state is defined   */  public String getCompatibilityState() {    String result = "-X " + m_numFolds + " ";     if (m_splitEvaluator == null) {      result += "<null SplitEvaluator>";    } else {      result += "-W " + m_splitEvaluator.getClass().getName();    }    return result + " --";  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String outputFileTipText() {    return "Set the destination for saving raw output. If the rawOutput "      +"option is selected, then output from the splitEvaluator for "      +"individual folds is saved. If the destination is a directory, "      +"then each output is saved to an individual gzip file; if the "      +"destination is a file, then each output is saved as an entry "      +"in a zip file.";  }  /**   * Get the value of OutputFile.   *   * @return Value of OutputFile.   */  public File getOutputFile() {        return m_outputFile;  }    /**   * Set the value of OutputFile.   *   * @param newOutputFile Value to assign to OutputFile.   */  public void setOutputFile(File newOutputFile) {        m_outputFile = newOutputFile;  }  /** Get the value of separate training file   * @return Value of separate training file.   */
dedupingprcurvecvresultproducersplit.java - 源码说明

本页面展示了「wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器」中的 dedupingprcurvecvresultproducersplit.java 源码文件，采用 Java 编程语言编写，共 1,053 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与university相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?