📄 instances.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
   *   * @param tokenizer the stream tokenizer   * @exception IOException if reading the next token fails   */  private void getFirstToken(StreamTokenizer tokenizer)     throws IOException {        while (tokenizer.nextToken() == StreamTokenizer.TT_EOL){};    if ((tokenizer.ttype == '\'') ||	(tokenizer.ttype == '"')) {      tokenizer.ttype = StreamTokenizer.TT_WORD;    } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&	       (tokenizer.sval.equals("?"))){      tokenizer.ttype = '?';    }  }  /**   * Gets index, checking for a premature and of line.   *   * @param tokenizer the stream tokenizer   * @exception IOException if it finds a premature end of line   */  private void getIndex(StreamTokenizer tokenizer) throws IOException {        if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {      errms(tokenizer,"premature end of line");    }    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    }  }    /**   * Gets token and checks if its end of line.   *   * @param tokenizer the stream tokenizer   * @exception IOException if it doesn't find an end of line   */  private void getLastToken(StreamTokenizer tokenizer, boolean endOfFileOk)        throws IOException {    if ((tokenizer.nextToken() != StreamTokenizer.TT_EOL) &&	((tokenizer.ttype != StreamTokenizer.TT_EOF) || !endOfFileOk)) {      errms(tokenizer,"end of line expected");    }  }  /**   * Gets next token, checking for a premature and of line.   *   * @param tokenizer the stream tokenizer   * @exception IOException if it finds a premature end of line   */  private void getNextToken(StreamTokenizer tokenizer)        throws IOException {        if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {      errms(tokenizer,"premature end of line");    }    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    } else if ((tokenizer.ttype == '\'') ||	       (tokenizer.ttype == '"')) {      tokenizer.ttype = StreamTokenizer.TT_WORD;    } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&	       (tokenizer.sval.equals("?"))){      tokenizer.ttype = '?';    }  }	  /**   * Initializes the StreamTokenizer used for reading the ARFF file.   *   * @param tokenizer the stream tokenizer   */  private void initTokenizer(StreamTokenizer tokenizer){    tokenizer.resetSyntax();             tokenizer.whitespaceChars(0, ' ');        tokenizer.wordChars(' '+1,'\u00FF');    tokenizer.whitespaceChars(',',',');    tokenizer.commentChar('%');    tokenizer.quoteChar('"');    tokenizer.quoteChar('\'');    tokenizer.ordinaryChar('{');    tokenizer.ordinaryChar('}');    tokenizer.eolIsSignificant(true);  }   /**   * Returns string including all instances, their weights and   * their indices in the original dataset.   *   * @return description of instance and its weight as a string   */  private String instancesAndWeights(){    StringBuffer text = new StringBuffer();    for (int i = 0; i < numInstances(); i++) {      text.append(instance(i) + " " + instance(i).weight());      if (i < numInstances() - 1) {	text.append("\n");      }    }    return text.toString();  }    /**   * Implements quicksort.   *   * @param attIndex the attribute's index   * @param lo0 the first index of the subset to be sorted   * @param hi0 the last index of the subset to be sorted   */  private void quickSort(int attIndex, int lo0, int hi0) {        int lo = lo0, hi = hi0;    double mid, midPlus, midMinus;        if (hi0 > lo0) {            // Arbitrarily establishing partition element as the       // midpoint of the array.      mid = instance((lo0 + hi0) / 2).value(attIndex);      midPlus = mid + 1e-6;      midMinus = mid - 1e-6;      // loop through the array until indices cross      while(lo <= hi) {		// find the first element that is greater than or equal to 	// the partition element starting from the left Index.	while ((instance(lo).value(attIndex) < 		midMinus) && (lo < hi0)) {	  ++lo;	}		// find an element that is smaller than or equal to	// the partition element starting from the right Index.	while ((instance(hi).value(attIndex)  > 		midPlus) && (hi > lo0)) {	  --hi;	}		// if the indexes have not crossed, swap	if(lo <= hi) {	  swap(lo,hi);	  ++lo;	  --hi;	}      }            // If the right index has not reached the left side of array      // must now sort the left partition.      if(lo0 < hi) {	quickSort(attIndex,lo0,hi);      }            // If the left index has not reached the right side of array      // must now sort the right partition.      if(lo < hi0) {	quickSort(attIndex,lo,hi0);      }    }  }  /**   * Reads and skips all tokens before next end of line token.   *   * @param tokenizer the stream tokenizer   */  private void readTillEOL(StreamTokenizer tokenizer)        throws IOException {        while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {};    tokenizer.pushBack();  }  /**   * Help function needed for stratification of set.   *   * @param numFolds the number of folds for the stratification   */  private void stratStep (int numFolds){        FastVector newVec = new FastVector(m_Instances.capacity());    int start = 0, j;    // create stratified batch    while (newVec.size() < numInstances()) {      j = start;      while (j < numInstances()) {	newVec.addElement(instance(j));	j = j + numFolds;      }      start++;    }    m_Instances = newVec;  }    /**   * Swaps two instances in the set.   *   * @param i the first instance's index   * @param j the second instance's index   */  private void swap(int i, int j){        m_Instances.swap(i, j);  }  /**   * Merges two sets of Instances together. The resulting set will have   * all the attributes of the first set plus all the attributes of the    * second set. The number of instances in both sets must be the same.   *   * @param first the first set of Instances   * @param second the second set of Instances   * @return the merged set of Instances   * @exception IllegalArgumentException if the datasets are not the same size   */  public static Instances mergeInstances(Instances first, Instances second) {    if (first.numInstances() != second.numInstances()) {      throw new IllegalArgumentException("Instance sets must be of the same size");    }    // Create the vector of merged attributes    FastVector newAttributes = new FastVector();    for (int i = 0; i < first.numAttributes(); i++) {      newAttributes.addElement(first.attribute(i));    }    for (int i = 0; i < second.numAttributes(); i++) {      newAttributes.addElement(second.attribute(i));    }        // Create the set of Instances    Instances merged = new Instances(first.relationName() + '_'				     + second.relationName(), 				     newAttributes, 				     first.numInstances());    // Merge each instance    for (int i = 0; i < first.numInstances(); i++) {      merged.add(first.instance(i).mergeInstance(second.instance(i)));    }    return merged;  }  /**   * Initializes the ranges using all instances of the dataset.    * Sets m_Ranges.   * @return the ranges     */  public double [][] initializeRanges() {    int numAtt = this.numAttributes();    double [][] ranges = new double [numAtt][3];        if (this.numInstances() <= 0) {      initializeRangesEmpty(numAtt, ranges);      return ranges;    }    else      // initialize ranges using the first instance      updateRangesFirst(this.instance(0), numAtt, ranges);    // update ranges, starting from the second    for (int i = 1; i < this.numInstances(); i++) {      updateRanges(this.instance(i), numAtt, ranges);    }    m_Ranges = ranges;     return ranges;  }  /**   * Initializes the ranges of a subset of the instances of this dataset.   * Therefore m_Ranges is not set.   * @param instList list of indexes of the subset   * @return the ranges   */  public double [][] initializeRanges(int[] instList) {    int numAtt = this.numAttributes();    double [][] ranges = new double [numAtt][3];        if (this.numInstances() <= 0) {      initializeRangesEmpty(numAtt, ranges);      return ranges;    }    else {      // initialize ranges using the first instance      updateRangesFirst(this.instance(instList[0]), numAtt, ranges);      // update ranges, starting from the second      for (int i = 1; i < instList.length; i++) {	updateRanges(this.instance(instList[i]), numAtt, ranges);      }    }    return ranges;  } /**   * Used to initialize the ranges.    * @param numAtt number of attributes in the model   * @param ranges low, high and width values for all attributes   */  public void initializeRangesEmpty(int numAtt,				double[][] ranges) {          for (int j = 0; j < numAtt; j++) {      ranges[j][R_MIN] = Double.MAX_VALUE;      ranges[j][R_MAX] = Double.MIN_VALUE;      ranges[j][R_WIDTH] = Double.MIN_VALUE;     }  }  /**   * Used to initialize the ranges. For this the values of the first    * instance is used to save time.   * Sets low and high to the values of the first instance and   * width to zero.   * @param instance the new instance   * @param numAtt number of attributes in the model   * @param ranges low, high and width values for all attributes   */  public void updateRangesFirst(Instance instance, int numAtt,				double[][] ranges) {          for (int j = 0; j < numAtt; j++) {      if (!instance.isMissing(j)) {	ranges[j][R_MIN] = instance.value(j);	ranges[j][R_MAX] = instance.value(j);	ranges[j][R_WIDTH] = 0.0;      }       else { // if value was missing	ranges[j][R_MIN] = Double.MIN_VALUE;	ranges[j][R_MAX] = Double.MAX_VALUE;	ranges[j][R_WIDTH] = Double.MAX_VALUE;       }    }  }   /**   * Updates the minimum and maximum and width values for all the attributes   * based on a new instance.   * @param instance the new instance   * @param numAtt number of attributes in the model   * @param ranges low, high and width values for all attributes   */  private void updateRanges(Instance instance, int numAtt,			    double [][] ranges) {          // updateRangesFirst must have been called on ranges    for (int j = 0; j < numAtt; j++) {      double value = instance.value(j);      if (!instance.isMissing(j)) {	if (value < ranges[j][R_MIN]) {	  ranges[j][R_MIN] = value;	  ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];	} else {	  if (instance.value(j) > ranges[j][R_MAX]) {	    ranges[j][R_MAX] = value;	    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];	  }	}      }    }  }  /**   * prints the ranges.   * @param instance the new instance   * @param numAtt number of attributes in the model   * @param ranges low, high and width values for all attributes   */  public static void printRanges(double [][] ranges) {          OOPSS("printRanges");    // updateRangesFirst must have been called on ranges    for (int j = 0; j < ranges.length; j++) {      OOPSS(" "+j+"-MIN "+ranges[j][R_MIN]);      OOPSS(" "+j+"-MAX "+ranges[j][R_MAX]);      OOPSS(" "+j+"-WIDTH "+ranges[j][R_WIDTH]);    }  }  /**   * Updates the ranges given a new instance.   * @param instance the new instance   * @param numAtt number of attributes in the model   * @param ranges low, high and width values for all attributes   *  public void updateRanges(Instance instance, double [][] ranges) {          int numAtt = numAttributes();    // updateRangesFirst must have been called on ranges    for (int j = 0; j < numAtt; j++) {      double value = instance.value(j);      if (!instance.isMissing(j)) {	if (value < ranges[j][R_MIN]) {	  ranges[j][R_MIN] = value;	  ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];	} else {	  if (instance.value(j) > ranges[j][R_MAX]) {	    ranges[j][R_MAX] = value;	    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];	  }	}      }    }  }*/  /**   * Updates the ranges given a new instance.   * @param instance the new instance   * @param ranges low, high and width values for all attributes   */  public static double [][] updateRanges(Instance instance, 					 double [][] ranges) {      // updateRangesFirst must have been called on ranges    for (int j = 0; j < ranges.length; j++) {      double value = instance.value(j);      if (!instance.isMissing(j)) {	if (value < ranges[j][R_MIN]) {	  ranges[j][R_MIN] = value;	  ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];	} else {	  if (instance.value(j) > ranges[j][R_MAX]) {	    ranges[j][R_MAX] = value;	    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];	  }	}      }    }    return ranges;  }  /**   * Test if an instance is within the giv
上一页 1 2 3 45
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -