⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instances.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
   * @param random a random number generator
   */
  public final void randomize(Random random) {

    for (int j = numInstances() - 1; j > 0; j--)
      swap(j,(int)(random.nextDouble()*(double)j));
  }

  /**
   * Reads a single instance from the reader and appends it
   * to the dataset.  Automatically expands the dataset if it
   * is not large enough to hold the instance. This method does
   * not check for carriage return at the end of the line.
   *
   * @param reader the reader
   * @return false if end of file has been reached
   * @exception IOException if the information is not read
   * successfully
   */
  public final boolean readInstance(Reader reader)
       throws IOException {

    StreamTokenizer tokenizer = new StreamTokenizer(reader);

    initTokenizer(tokenizer);
    return getInstance(tokenizer, false);
  }

  /**
   * Returns the relation's name.
   *
   * @return the relation's name as a string
   */
  public final String relationName() {

    return m_RelationName;
  }

  /**
   * Renames an attribute. This change only affects this
   * dataset.
   *
   * @param att the attribute's index
   * @param name the new name
   */
  public final void renameAttribute(int att, String name) {

    Attribute newAtt = attribute(att).copy(name);
    FastVector newVec = new FastVector(numAttributes());

    for (int i = 0; i < numAttributes(); i++) {
      if (i == att) {
	newVec.addElement(newAtt);
      } else {
	newVec.addElement(attribute(i));
      }
    }
    m_Attributes = newVec;
  }

  /**
   * Renames an attribute. This change only affects this
   * dataset.
   *
   * @param att the attribute
   * @param name the new name
   */
  public final void renameAttribute(Attribute att, String name) {

    renameAttribute(att.index(), name);
  }

  /**
   * Renames the value of a nominal (or string) attribute value. This
   * change only affects this dataset.
   *
   * @param att the attribute's index
   * @param val the value's index
   * @param name the new name
   * @exception Exception if renaming fails
   */
  public final void renameAttributeValue(int att, int val, String name)
    throws Exception {

    Attribute newAtt = (Attribute)attribute(att).copy();
    FastVector newVec = new FastVector(numAttributes());

    newAtt.setValue(val, name);
    for (int i = 0; i < numAttributes(); i++) {
      if (i == att) {
	newVec.addElement(newAtt);
      } else {
	newVec.addElement(attribute(i));
      }
    }
    m_Attributes = newVec;
  }

  /**
   * Renames the value of a nominal (or string) attribute value. This
   * change only affects this dataset.
   *
   * @param att the attribute
   * @param val the value
   * @param name the new name
   * @exception Exception if renaming fails
   */
  public final void renameAttributeValue(Attribute att, String val, String name)
    throws Exception {

    renameAttributeValue(att.index(), att.indexOfValue(val), name);
  }

  /**
   * Creates a new dataset of the same size using random sampling
   * with replacement.
   *
   * @param random a random number generator
   * @return the new dataset
   */
  public final Instances resample(Random random) {

    Instances newData = new Instances(this, numInstances());
    while (newData.numInstances() < numInstances()) {
      int i = (int) (random.nextDouble() * (double) numInstances());
      newData.add(instance(i));
    }
    return newData;
  }

  /**
   * Creates a new dataset of the same size using random sampling
   * with replacement according to the current instance weights. The
   * weights of the instances in the new dataset are set to one.
   *
   * @param random a random number generator
   * @return the new dataset
   * @exception Exception if something goes wrong
   */
  public final Instances resampleWithWeights(Random random)
    throws Exception {

    double [] weights = new double[numInstances()];
    boolean foundOne = false;
    for (int i = 0; i < weights.length; i++) {
      weights[i] = instance(i).weight();
      if (!Utils.eq(weights[i], weights[0])) {
        foundOne = true;
      }
    }
    if (foundOne) {
      return resampleWithWeights(random, weights);
    } else {
      return new Instances(this);
    }
  }


  /**
   * Creates a new dataset of the same size using random sampling
   * with replacement according to the given weight vector. The
   * weights of the instances in the new dataset are set to one.
   * The length of the weight vector has to be the same as the
   * number of instances in the dataset, and all weights have to
   * be positive.
   *
   * @param random a random number generator
   * @param weights the weight vector
   * @return the new dataset
   * @exception Exception if something goes wrong
   */
  public final Instances resampleWithWeights(Random random,
					     double[] weights)
    throws Exception {

    if (weights.length != numInstances()) {
      throw new Exception("Length of weight vector incompatible.");
    }
    Instances newData = new Instances(this, numInstances());
    double[] probabilities = new double[numInstances()];
    double sumProbs = 0, sumOfWeights = Utils.sum(weights);
    for (int i = 0; i < numInstances(); i++) {
      sumProbs += random.nextDouble();
      probabilities[i] = sumProbs;
    }
    Utils.normalize(probabilities, sumProbs / sumOfWeights);

    // Make sure that rounding errors don't mess things up
    probabilities[numInstances() - 1] = sumOfWeights;
    int k = 0; int l = 0;
    sumProbs = 0;
    while ((k < numInstances() && (l < numInstances()))) {
      if (weights[l] < 0) {
	throw new Exception("Weights have to be positive.");
      }
      sumProbs += weights[l];
      while ((k < numInstances()) &&
	     (probabilities[k] <= sumProbs)) {
	newData.add(instance(l));
	newData.instance(k).setWeight(1);
	k++;
      }
      l++;
    }
    return newData;
  }

  /**
   * Sets the class attribute.
   *
   * @param att attribute to be the class
   */
  public final void setClass(Attribute att) {

    m_ClassIndex = att.index();
  }

  /**
   * Sets the class index of the set.
   * If the class index is negative there is assumed to be no class.
   * (ie. it is undefined)
   *
   * @param classIndex the new class index
   * @exception Exception if the class index is too big
   */
  public final void setClassIndex(int classIndex) throws Exception {

    if (classIndex >= numAttributes()) {
      throw new Exception("Class index to large!");
    }
    m_ClassIndex = classIndex;
  }

  /**
   * Sets the relation's name.
   *
   * @param newName the new relation name.
   */
  public final void setRelationName(String newName) {

    m_RelationName = newName;
  }

  /**
   * Sorts the instances based on an attribute. For numeric attributes,
   * instances are sorted in ascending order. For nominal attributes,
   * instances are sorted based on the attribute label ordering
   * specified in the header. Instances with missing values for the
   * attribute are placed at the end of the dataset.
   *
   * @param attIndex the attribute's index
   */
  public final void sort(int attIndex) {

    int i,j;

    // move all instances with missing values to end
    j = numInstances() - 1;
    i = 0;
    while (i <= j) {
      if (instance(j).isMissing(attIndex)) {
	j--;
      } else {
	if (instance(i).isMissing(attIndex)) {
	  swap(i,j);
	  j--;
	}
	i++;
      }
    }
    quickSort(attIndex, 0, j);
  }

  /**
   * Sorts the instances based on an attribute. For numeric attributes,
   * instances are sorted into ascending order. For nominal attributes,
   * instances are sorted based on the attribute label ordering
   * specified in the header. Instances with missing values for the
   * attribute are placed at the end of the dataset.
   *
   * @param att the attribute
   */
  public final void sort(Attribute att) {

    sort(att.index());
  }

  /**
   * Stratifies a set of instances according to its class values
   * if the class attribute is nominal (so that afterwards a
   * stratified cross-validation can be performed).
   *
   * @param numFolds the number of folds in the cross-validation
   * @exception Exception if the class is not set
   */
  public final void stratify(int numFolds) throws Exception {

    Instance instance1, instance2;
    int j, index;

    if (m_ClassIndex < 0) {
      throw new Exception("Class index is negative (not set)!");
    }
    if (classAttribute().isNominal()) {

      // sort by class
      index = 1;
      while (index < numInstances()) {
	instance1 = instance(index - 1);
	for (j = index; j < numInstances(); j++) {
	  instance2 = instance(j);
	  if ((instance1.classValue() == instance2.classValue()) ||
	      (instance1.classIsMissing() &&
	       instance2.classIsMissing())) {
	    swap(index,j);
	    index++;
	  }
	}
	index++;
      }
      stratStep(numFolds);
    }
  }

  /**
   * Computes the sum of all the instances' weights.
   *
   * @return the sum of all the instances' weights as a double
   */
  public final double sumOfWeights() {

    double sum = 0;

    for (int i = 0; i < numInstances(); i++) {
      sum += instance(i).weight();
    }
    return sum;
  }

  /**
   * Creates the test set for one fold of a cross-validation on
   * the dataset.
   *
   * @param numFolds the number of folds in the cross-validation. Must
   * be greater than 1.
   * @param numFold 0 for the first fold, 1 for the second, ...
   * @return the test set as a set of weighted instances
   * @exception Exception if dataset can't be generated
   * successfully
   */
  public Instances testCV(int numFolds, int numFold)
       throws Exception {

    int numInstForFold, first, offset;
    Instances test;

    if (numFolds < 2) {
      throw new Exception("Number of folds must be at least 2!");
    }
    if (numFolds > numInstances()) {
      throw new Exception("Can't have more folds than instances!");
    }
    numInstForFold = numInstances() / numFolds;
    if (numFold < numInstances() % numFolds){
      numInstForFold++;
      offset = numFold;
    }else
      offset = numInstances() % numFolds;
    test = new Instances(this, numInstForFold);
    first = numFold * (numInstances() / numFolds) + offset;
    copyInstances(first, test, numInstForFold);
    return test;
  }

  /**
   * Returns the dataset as a string in ARFF format. Strings
   * are quoted if they contain whitespace characters, or if they
   * are a question mark.
   *
   * @return the dataset in ARFF format as a string
   */
  public final String toString() {

    StringBuffer text = new StringBuffer();

    text.append("@relation " + Utils.quote(m_RelationName) + "\n\n");
    for (int i = 0; i < numAttributes(); i++) {
      text.append(attribute(i) + "\n");
    }
    text.append("\n@data\n");
    for (int i = 0; i < numInstances(); i++) {
      text.append(instance(i));
      if (i < numInstances() - 1) {
	text.append('\n');
      }
    }
    return text.toString();
  }

  /**
   * Creates the training set for one fold of a cross-validation
   * on the dataset.
   *
   * @param numFolds the number of folds in the cross-validation. Must
   * be greater than 1.
   * @param numFold 0 for the first fold, 1 for the second, ...
   * @return the training set as a set of weighted
   * instances
   * @exception Exception if dataset can't be generated
   * successfully
   */
  public Instances trainCV(int numFolds, int numFold)
       throws Exception {

    int numInstForFold, first, offset;
    Instances train;

    if (numFolds < 2) {
      throw new Exception("Number of folds must be at least 2!");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -