⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dataset.java

📁 一个决策树的Applet(转载
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
  /**
   * Returns an iterator over the testing examples in the current dataset.
   *
   * @return An iterator over all the testing examples.
   */
  public Iterator getTestingExamples()
  {
    return m_testingSet.iterator();
  }

  /**
   * Moves an example from the training set to
   * the testing set.  If the supplied example number
   * is out of range no example is moved.
   *
   * @param exampleNum The example to transfer to the testing set.
   */
  public void moveToTestingSet( int exampleNum )
  {
    if( exampleNum < 0 || exampleNum > (m_trainingSet.size() - 1) )
      return;

    m_testingSet.add( m_trainingSet.remove( exampleNum ) );
  }

  /**
   * Moves an example from the training set to
   * the testing set.  If the supplied example number
   * is out of range no example is moved.
   *
   * @param exampleNum The example to transfer to the training set.
   */
  public void moveToTrainingSet( int exampleNum )
  {
    if( exampleNum < 0 || exampleNum > (m_testingSet.size() - 1) )
      return;

    m_trainingSet.add( m_testingSet.remove( exampleNum ) );
  }

  /**
   * Returns a particular training example from the examples in the
   * dataset.
   *
   * @return The selected example from the training
   *         dataset as an integer array.
   *
   * @throws IndexOutOfBoundsException If the example
   *         number is less than zero, or greater than
   *         the number of training examples in the dataset
   *         minus one.
   */
  public int[] getTrainingExample( int exampleNum )
  {
    if( exampleNum < 0 || exampleNum >= m_trainingSet.size() )
      throw new
        IndexOutOfBoundsException( "Example number " +
          exampleNum + " does not exist." );

    return (int[])m_trainingSet.elementAt( exampleNum );
  }

  /**
   * Returns a particular testing example from the examples in the
   * dataset.
   *
   * @return The selected example from the testing
   *         dataset as an integer array.
   *
   * @throws IndexOutOfBoundsException If the example
   *         number is less than zero, or greater than
   *         the number of testing examples in the
   *         dataset minus one.
   */
  public int[] getTestingExample( int exampleNum )
  {
    if( exampleNum < 0 || exampleNum >= m_testingSet.size() )
      throw new
        IndexOutOfBoundsException( "Example number " +
          exampleNum + " does not exist." );

    return (int[])m_testingSet.elementAt( exampleNum );
  }

  /**
   * Creates a random testing dataset.  Calling this
   * method will destroy any previously built testing set.
   *
   * @param percentage Percentage of the entire dataset to
   *        use for testing.
   *
   * @param balanced <code>true</code> to create a balanced
   *        testing set, where the testing set and the
   *        remaining training set have the same proportion
   *        of each class.
   *
   * @throws IllegalArgumentException If the percentage value
   *         is < 0 or > 100.
   */
  public void createRndTestSet( int percentage, boolean balanced )
  {
    if( percentage < 0 || percentage > 100 )
      throw new
        IllegalArgumentException( "Percentage value out of range." );

    // Move any examples that are part of the current testing
    // set back to the training set.
    m_trainingSet.addAll( m_testingSet );
    m_testingSet.clear();

    // Calculate the number of examples that should be
    // in the testing set.
    int totalNumExamples = m_trainingSet.size();
    int numTestingExamples =
      Math.round( totalNumExamples * ((float)percentage)/100.0f );
      Random rand = new Random();

    // If the set doesn't have to be balanced, then just
    // pick examples at random.
    if( !balanced ) {
      for( int i = 0; i < numTestingExamples; i++ ) {
        m_testingSet.add(
          m_trainingSet.remove( rand.nextInt( m_trainingSet.size() ) ) );
      }
    }
    else {
      // We have the target value distribution for the dataset,
      // so reference it.
      for( int i = 0; i < m_targetSums.length; i++ ) {
        int numExamplesToMove =
          Math.round( m_targetSums[i] / ((float)totalNumExamples)
                      * numTestingExamples );

        for( int j = 0; j < numExamplesToMove; j++ ) {
          // Attempt to randomly  pick examples from the
          // dataset that have the required target classification.
          int[] example = null;

          while( true ) {
            example = (int[])
              m_trainingSet.get( rand.nextInt( m_trainingSet.size() ) );

            if( example[0] == i ) break;
          }

          m_testingSet.add( m_trainingSet.remove(
                            m_trainingSet.indexOf( example ) ) );
        }
      }
    }
  }

  // Private methods

  /**
   * Parses a decision tree configuration file.
   *
   * @param  metaFileInputStream An input stream attached
   *         to the metadata file.
   *
   * @return A String containing the name of the ID3
   *         data file to open and read.
   *
   * @throws An InvalidMetaFileException if a
   *         syntax error is encountered during the
   *         parsing process.
   *
   * @throws An IOException if a problem occurs while
   *         reading the configuration file.
   */
  private String parseMetaFile( InputStream metaInputStream )
    throws InvalidMetaFileException,
           IOException
  {
    String buf; // temporary buffer

    FileParser parser = new FileParser();

    // 1. Open the metadata file and read in (ignoring comments).
    parser.startMetaParse( metaInputStream );

    // 2. Extract the target attribute name.
    parser.moveToTargetAttribute();

    String targetName   = new String( parser.extractAttributeName() );
    Vector targetValues = new Vector();
    int numTargetValues;

    // 3. Loop, extracting the possible values for the target attribute.
    while( (buf = parser.extractAttributeValue()) != null )
      targetValues.add( new String( buf ) );

    // Build the target attribute and add it to the attributes vector.
    Attribute targetAttribute = new Attribute( targetName, targetValues, 1 );

    numTargetValues = targetValues.size();
    m_attributes.add( targetAttribute );

    // 4. Get ready to extract general attribute information.
    parser.moveToAttributes();

    // 5. Loop, extracting each attribute one after the other.
    while( (buf = parser.extractAttributeName()) != null ) {
      String attName   = new String( buf );
      Vector attValues = new Vector();

      // 5.a. Loop, extracting the possible values for this
      //      attribute.
      while( (buf = parser.extractAttributeValue()) != null )
        attValues.add( new String( buf ) );

      Attribute nextAtt = new Attribute( attName, attValues, numTargetValues );
      m_attributes.add( nextAtt );
    }

    // 6. Extract the name of the data file.
    parser.moveToDataFilePath();

    String dataFilePath = parser.extractString();

    //------------------------- DEBUG -------------------------
    if( DEBUG_ON )
    {
      System.out.println( "Dataset::parseMetaFile: " +
        "Finished parsing configuration file." );
      System.out.println();

      // Target attribute information.
      System.out.println( "Target Attribute" );
      System.out.println( "------------------------------" );
      System.out.println();
      System.out.println( "Target Attribute Name:  " +
        ((Attribute)m_attributes.elementAt(0)).getName() );
      System.out.println( "Number of Values:       " +
        ((Attribute)m_attributes.elementAt(0)).getNumValues() );
      System.out.println( "Possible Values:        " +
        ((Attribute)m_attributes.elementAt(0)).getValueNames() );
      System.out.println();

      // General attribute information.
      System.out.println( "Attributes" );
      System.out.println( "------------------------------" );
      System.out.println();
      System.out.println( "Number of Attributes:   " +
        (getNumAttributes() - 1) );
      System.out.println();

      for( int i = 1; i < getNumAttributes(); i++ ) {
        Attribute currAtt = null;

        try {
          currAtt = getAttributeByNum( i );
        }
        catch( NonexistentAttributeException e ) {
          // can't happen
        }

        System.out.println( "Attribute " + i + " Name:       " +
          currAtt.getName() );
        System.out.println( "Number of Values:       " +
          currAtt.getNumValues() );
        System.out.println( "Possible Values:        " +
          currAtt.getValueNames() );
        System.out.println();
      }

      // Data file information.
      System.out.println( "Training/Testing Data" );
      System.out.println( "------------------------------" );
      System.out.println();
      System.out.println( "Data will be extracted from " + dataFilePath );
      System.out.println();
    }

    return dataFilePath;
  }

  /**
   * Parses a decision tree data file.
   *
   * @param  dataFileInputStream An input stream attached
   *         to the data file.
   *
   * @throws InvalidDataFileException If a
   *         syntax error is encountered during the
   *         parsing process.
   *
   * @throws IOException If a problem occurs while
   *         reading the data file.
   */
  private void parseDataFile( InputStream dataInputStream )
    throws InvalidDataFileException,
           IOException
  {
    Vector rawSample;  // Vector of strings extracted
                       // directly from the data file.

    FileParser parser = new FileParser();

    // 1. Open the data file.
    parser.startDataParse( dataInputStream );

    // 2. Start extracting samples.
    while( (rawSample = parser.extractDataSample()) != null ) {
      // We have the raw samples, so now we verify
      // that all the values are legal and store the
      // sample in compact form (using the integer index of
      // each value).
      if( rawSample.size() != m_attributes.size() )
        throw new
          InvalidDataFileException( "Syntax error in data file (line " +
            parser.getCurrentLineNum() + "): Wrong " +
            "number of attributes on line." );

      int[] dataSample = new int[ m_attributes.size() ];

      try {
        // 2.a. Deal with all the attributes.
        for( int i = 0; i < rawSample.size(); i++ ) {
          // There should be a 1-to-1 ordering between
          // the internal attributes vector and the
          // raw sample vector.
          Attribute currAtt = (Attribute)m_attributes.elementAt( i );

          int attPos = currAtt.getAttributeValuePosition(
                         (String)rawSample.elementAt( i ) );
          dataSample[i] = attPos;

            if( i == 0 ) m_targetSums[ attPos ]++;
        }
      }
      catch( NonexistentAttributeValueException e ) {
        // One of the attribute values on this line of
        // the data file doesn't correspond with the values
        // specified in the config file.
        throw new
          InvalidDataFileException( "Syntax error in data " +
            "file (line " + parser.getCurrentLineNum() +
            "): Attribute value does not match any values in " +
            "configuration file." );
      }

      // Add the last data sample to our current training set.
      m_trainingSet.add( dataSample );
    }

    //------------------------- DEBUG -------------------------
    if( DEBUG_ON ) {
      System.out.println( "Dataset::parseDataFile: " +
        "Finished parsing data file." );
      System.out.println();

      // Target attribute information.
      System.out.println( "Data Samples" );
      System.out.println( "------------------------------" );
      System.out.println();
      System.out.println( "Parsed and stored " + m_trainingSet.size() +
                          " data samples." );
      System.out.println();
      System.out.print( "Target classification counts are: " );

      for( int i = 0; i < m_targetSums.length; i++ )
        System.out.print( m_targetSums[i] + "  " );

      System.out.println();
      System.out.println();

    }
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -