📄 instances.java

📁 矩阵的QR分解算法
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
      offset = numInstances() % numFolds;    test = new Instances(this, numInstForFold);    first = numFold * (numInstances() / numFolds) + offset;    copyInstances(first, test, numInstForFold);    return test;  }   /**   * Returns the dataset as a string in ARFF format. Strings   * are quoted if they contain whitespace characters, or if they   * are a question mark.   *   * @return the dataset in ARFF format as a string   */  public String toString() {        StringBuffer text = new StringBuffer();        text.append(ARFF_RELATION).append(" ").      append(Utils.quote(m_RelationName)).append("\n\n");    for (int i = 0; i < numAttributes(); i++) {      text.append(attribute(i)).append("\n");    }    text.append("\n").append(ARFF_DATA).append("\n");    text.append(stringWithoutHeader());    return text.toString();  }  /**   * Returns the instances in the dataset as a string in ARFF format. Strings   * are quoted if they contain whitespace characters, or if they   * are a question mark.   *   * @return the dataset in ARFF format as a string   */  protected String stringWithoutHeader() {        StringBuffer text = new StringBuffer();    for (int i = 0; i < numInstances(); i++) {      text.append(instance(i));      if (i < numInstances() - 1) {	text.append('\n');      }    }    return text.toString();  }  /**   * Creates the training set for one fold of a cross-validation    * on the dataset.    *   * @param numFolds the number of folds in the cross-validation. Must   * be greater than 1.   * @param numFold 0 for the first fold, 1 for the second, ...   * @return the training set    * @throws IllegalArgumentException if the number of folds is less than 2   * or greater than the number of instances.   */  //@ requires 2 <= numFolds && numFolds < numInstances();  //@ requires 0 <= numFold && numFold < numFolds;  public Instances trainCV(int numFolds, int numFold) {    int numInstForFold, first, offset;    Instances train;     if (numFolds < 2) {      throw new IllegalArgumentException("Number of folds must be at least 2!");    }    if (numFolds > numInstances()) {      throw new IllegalArgumentException("Can't have more folds than instances!");    }    numInstForFold = numInstances() / numFolds;    if (numFold < numInstances() % numFolds) {      numInstForFold++;      offset = numFold;    }else      offset = numInstances() % numFolds;    train = new Instances(this, numInstances() - numInstForFold);    first = numFold * (numInstances() / numFolds) + offset;    copyInstances(0, train, first);    copyInstances(first + numInstForFold, train,		  numInstances() - first - numInstForFold);    return train;  }  /**   * Creates the training set for one fold of a cross-validation    * on the dataset. The data is subsequently randomized based   * on the given random number generator.   *   * @param numFolds the number of folds in the cross-validation. Must   * be greater than 1.   * @param numFold 0 for the first fold, 1 for the second, ...   * @param random the random number generator   * @return the training set    * @throws IllegalArgumentException if the number of folds is less than 2   * or greater than the number of instances.   */  //@ requires 2 <= numFolds && numFolds < numInstances();  //@ requires 0 <= numFold && numFold < numFolds;  public Instances trainCV(int numFolds, int numFold, Random random) {    Instances train = trainCV(numFolds, numFold);    train.randomize(random);    return train;  }  /**   * Computes the variance for a numeric attribute.   *   * @param attIndex the numeric attribute (index starts with 0)   * @return the variance if the attribute is numeric   * @throws IllegalArgumentException if the attribute is not numeric   */  public /*@pure@*/ double variance(int attIndex) {      double sum = 0, sumSquared = 0, sumOfWeights = 0;    if (!attribute(attIndex).isNumeric()) {      throw new IllegalArgumentException("Can't compute variance because attribute is " +			  "not numeric!");    }    for (int i = 0; i < numInstances(); i++) {      if (!instance(i).isMissing(attIndex)) {	sum += instance(i).weight() * 	  instance(i).value(attIndex);	sumSquared += instance(i).weight() * 	  instance(i).value(attIndex) *	  instance(i).value(attIndex);	sumOfWeights += instance(i).weight();      }    }    if (sumOfWeights <= 1) {      return 0;    }    double result = (sumSquared - (sum * sum / sumOfWeights)) /       (sumOfWeights - 1);    // We don't like negative variance    if (result < 0) {      return 0;    } else {      return result;    }  }  /**   * Computes the variance for a numeric attribute.   *   * @param att the numeric attribute   * @return the variance if the attribute is numeric   * @throws IllegalArgumentException if the attribute is not numeric   */  public /*@pure@*/ double variance(Attribute att) {        return variance(att.index());  }    /**   * Calculates summary statistics on the values that appear in this   * set of instances for a specified attribute.   *   * @param index the index of the attribute to summarize (index starts with 0)   * @return an AttributeStats object with it's fields calculated.   */  //@ requires 0 <= index && index < numAttributes();  public AttributeStats attributeStats(int index) {    AttributeStats result = new AttributeStats();    if (attribute(index).isNominal()) {      result.nominalCounts = new int [attribute(index).numValues()];    }    if (attribute(index).isNumeric()) {      result.numericStats = new weka.experiment.Stats();    }    result.totalCount = numInstances();    double [] attVals = attributeToDoubleArray(index);    int [] sorted = Utils.sort(attVals);    int currentCount = 0;    double prev = Instance.missingValue();    for (int j = 0; j < numInstances(); j++) {      Instance current = instance(sorted[j]);      if (current.isMissing(index)) {	result.missingCount = numInstances() - j;	break;      }      if (current.value(index) == prev) {	currentCount++;      } else {	result.addDistinct(prev, currentCount);	currentCount = 1;	prev = current.value(index);      }    }    result.addDistinct(prev, currentCount);    result.distinctCount--; // So we don't count "missing" as a value     return result;  }    /**   * Gets the value of all instances in this dataset for a particular   * attribute. Useful in conjunction with Utils.sort to allow iterating   * through the dataset in sorted order for some attribute.   *   * @param index the index of the attribute.   * @return an array containing the value of the desired attribute for   * each instance in the dataset.    */  //@ requires 0 <= index && index < numAttributes();  public /*@pure@*/ double [] attributeToDoubleArray(int index) {    double [] result = new double[numInstances()];    for (int i = 0; i < result.length; i++) {      result[i] = instance(i).value(index);    }    return result;  }  /**   * Generates a string summarizing the set of instances. Gives a breakdown   * for each attribute indicating the number of missing/discrete/unique   * values and other information.   *   * @return a string summarizing the dataset   */  public String toSummaryString() {    StringBuffer result = new StringBuffer();    result.append("Relation Name:  ").append(relationName()).append('\n');    result.append("Num Instances:  ").append(numInstances()).append('\n');    result.append("Num Attributes: ").append(numAttributes()).append('\n');    result.append('\n');    result.append(Utils.padLeft("", 5)).append(Utils.padRight("Name", 25));    result.append(Utils.padLeft("Type", 5)).append(Utils.padLeft("Nom", 5));    result.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));    result.append(Utils.padLeft("Missing", 12));    result.append(Utils.padLeft("Unique", 12));    result.append(Utils.padLeft("Dist", 6)).append('\n');    for (int i = 0; i < numAttributes(); i++) {      Attribute a = attribute(i);      AttributeStats as = attributeStats(i);      result.append(Utils.padLeft("" + (i + 1), 4)).append(' ');      result.append(Utils.padRight(a.name(), 25)).append(' ');      long percent;      switch (a.type()) {      case Attribute.NOMINAL:	result.append(Utils.padLeft("Nom", 4)).append(' ');	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.NUMERIC:	result.append(Utils.padLeft("Num", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.DATE:	result.append(Utils.padLeft("Dat", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.STRING:	result.append(Utils.padLeft("Str", 4)).append(' ');	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.RELATIONAL:	result.append(Utils.padLeft("Rel", 4)).append(' ');	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      default:	result.append(Utils.padLeft("???", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      }      result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /");      percent = Math.round(100.0 * as.missingCount / as.totalCount);      result.append(Utils.padLeft("" + percent, 3)).append("% ");      result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /");      percent = Math.round(100.0 * as.uniqueCount / as.totalCount);      result.append(Utils.padLeft("" + percent, 3)).append("% ");      result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' ');      result.append('\n');    }    return result.toString();  }  /**   * Copies instances from one set to the end of another    * one.   *   * @param from the position of the first instance to be copied   * @param dest the destination for the instances   * @param num the number of instances to be copied   */  //@ requires 0 <= from && from <= numInstances() - num;  //@ requires 0 <= num;  protected void copyInstances(int from, /*@non_null@*/ Instances dest, int num) {        for (int i = 0; i < num; i++) {      dest.add(instance(from + i));    }  }    /**   * Replaces the attribute information by a clone of   * itself.   */  protected void freshAttributeInfo() {    m_Attributes = (FastVector) m_Attributes.copyElements();  }   /**   * Returns string including all instances, their weights and   * their indices in the original dataset.   *   * @return description of instance and its weight as a string   */  protected /*@pure@*/ String instancesAndWeights(){    StringBuffer text = new StringBuffer();    for (int i = 0; i < numInstances(); i++) {      text.append(instance(i) + " " + instance(i).weight());      if (i < numInstances() - 1) {	text.append("\n");      }    }    return text.toString();  }    /**   * Partitions the instances around a pivot. Used by quicksort and   * kthSmallestValue.   *   * @param attIndex the attribute's index (index starts with 0)   * @param l the first index of the subset (index starts with 0)   * @param r the last index of the subset (index starts with 0)   *   * @return the index of the middle element   */  //@ requires 0 <= attIndex && attIndex < numAttributes();  //@ requires 0 <= left && left <= right && right < numInstances();  protected int partition(int attIndex, int l, int r) {        double pivot = instance((l + r) / 2).value(attIndex);    while (l < r) {      while ((instance(l).value(attIndex) < pivot) && (l < r)) {        l++;      }      while ((instance(r).value(attIndex) > pivot) && (l < r)) {        r--;      }      if (l < r) {        swap(l, r);        l++;        r--;      }    }    if ((l == r) && (instance(r).value(attIndex) > pivot)) {      r--;    }     return r;  }    /**   * Implements quicksort according to Manber's "Introduction to   * Algorithms".   *   * @param attIndex the attribute's index (index starts with 0)   * @param left the first index of the subset to be sorted (index starts with 0)   * @param right the last index of the subset to be sorted (index starts with 0)   */  //@ requires 0 <= attIndex && attIndex < numAttributes();  //@ requires 0 <= first && first <= right && right < numInstances();  protected void quickSort(int attIndex, int left, int right) {    if (left < right) {      int middle = partition(attIndex, left, right);      quickSort(attIndex, left, middle);      quickSort(attIndex, middle + 1, right);    }  }
💿 文件大小 531 K
👤 上传用户 bobey
📂 所属分类数学计算
🏷️ 相关标签

#矩阵 #分解 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -