⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rdg1.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    RDG1.java
 *    Copyright (C) 2000 Gabi Schmidberger.
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

package weka.datagenerators;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;

/** 
 * Class to generate data randomly by producing a decision list.
 * The decision list consists of rules.
 * Instances are generated randomly one by one. If decision list fails
 * to classify the current instance, a new rule according to this current
 * instance is generated and added to the decision list.<p>
 *
 * The option -V switches on voting, which means that at the end
 * of the generation all instances are
 * reclassified to the class value that is supported by the most rules.<p>
 *
 * This data generator can generate 'boolean' attributes (= nominal with
 * the values {true, false}) and numeric attributes. The rules can be
 * 'A' or 'NOT A' for boolean values and 'B < random_value' or
 * 'B >= random_value' for numeric values.<p> 
 *
 * Valid options are:<p>
 *
 * -R num <br>
 * The maximum number of attributes chosen to form a rule (default 10).<p>
 *
 * -M num <br>
 * The minimum number of attributes chosen to form a rule (default 1).<p>
 *
 * -I num <br>
 * The number of irrelevant attributes (default 0).<p>
 *
 * -N num <br>
 * The number of numeric attributes (default 0).<p>
 *
 * -S seed <br>
 * Random number seed for random function used (default 1). <p>
 *
 * -V <br>
 * Flag to use voting. <p>
 *
 * Following an example of a generated dataset: <br>
 *
 * %<br>
 * % weka.datagenerators.RDG1 -r expl -a 2 -c 3 -n 4 -N 1 -I 0 -M 2 -R 10 -S 2<br>
 * %<br>
 * relation expl<br>
 *<br>
 * attribute a0 {false,true}<br>
 * attribute a1 numeric<br>
 * attribute class {c0,c1,c2}<br>
 *<br>
 * data<br>
 *<br>
 * true,0.496823,c0<br>
 * false,0.743158,c1<br>
 * false,0.408285,c1<br>
 * false,0.993687,c2<br>
 * %<br>
 * % Number of attributes chosen as irrelevant = 0<br>
 * %<br>
 * % DECISIONLIST (number of rules = 3):<br>
 * % RULE 0:   c0 := a1 < 0.986, a0<br>
 * % RULE 1:   c1 := a1 < 0.95, not(a0)<br>
 * % RULE 2:   c2 := not(a0), a1 >= 0.562<br>
 *<p>
 * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
 * @version $Revision$ 
 **/
public class RDG1 extends Generator implements OptionHandler,
			                       Serializable {

  /*
   * class to represent decisionlist
   */
  private class RuleList implements Serializable {

    /**@serial rule list */
    private FastVector m_RuleList = null;
    
    /**@serial class */
    double m_ClassValue = 0.0;

    public double getClassValue() { return m_ClassValue; }
    
    public void setClassValue(double newClassValue) {
      m_ClassValue = newClassValue;
    }
    
    private void addTest (Test newTest) { 
      if (m_RuleList == null)
	m_RuleList = new FastVector();
      
      m_RuleList.addElement(newTest);
    }
    
    private double classifyInstance (Instance example) throws Exception {
      boolean passedAllTests = true;
      for (Enumeration e = m_RuleList.elements(); 
	   passedAllTests && e.hasMoreElements(); ) {
	Test test = (Test) e.nextElement();
	passedAllTests = test.passesTest(example);
      }
      if (passedAllTests) return m_ClassValue;
      else return -1.0;
    }
    
    public String toString () {
      StringBuffer str = new StringBuffer();
      str = str.append("  c" + (int) m_ClassValue + " := ");
      Enumeration e = m_RuleList.elements();
      if (e.hasMoreElements()) {
	Test test = (Test) e.nextElement();
	str = str.append(test.toPrologString()); 
      }
      while (e.hasMoreElements()) {
	Test test = (Test) e.nextElement();
	str = str.append(", " + test.toPrologString());       
      }
      return str.toString();
    } 
    
  } /*end class RuleList ******/

  /**@serial maximum rule size*/ 
  private int m_MaxRuleSize = 10;
  
  /**@serial minimum rule size*/ 
  private int m_MinRuleSize = 1;
  
  /**@serial number of irrelevant attributes.*/
  private int m_NumIrrelevant = 0;

  /**@serial number of numeric attribute*/
  private int m_NumNumeric = 0;

  /**@serial random number generator seed*/ 
  private int m_Seed = 1;
 
  /**@serial flag that stores if voting is wished*/ 
  private boolean m_VoteFlag = false;

  /**@serial dataset format*/ 
  private Instances m_DatasetFormat = null;

  /**@serial random number generator*/ 
  private Random m_Random = null;

   /**@serial decision list */
  private FastVector m_DecisionList = null;

  /**@serial array defines which attributes are irrelevant, with: */
  /* true = attribute is irrelevant; false = attribute is not irrelevant*/
  boolean [] m_AttList_Irr;

  /**@serial debug flag*/ 
  private int m_Debug = 0;

  /**
   * Returns a string describing this data generator.
   *
   * @return a description of the data generator suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    
    return "A data generator that produces data randomly "
           + "with \'boolean\' (nominal with values {false,true}) and"
           + "numeric attributes by producing a decisionlist.";
  }

 /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(5);

    newVector.addElement(new Option(
              "\tmaximum size for rules (default 10) ",
              "R", 1, "-R <num>"));
    newVector.addElement(new Option(
              "\tminimum size for rules (default 1) ",
              "M", 1, "-M <num>"));
    newVector.addElement(new Option(
              "\tnumber of irrelevant attributes (default 0)",
              "I", 1, "-I <num>"));
    newVector.addElement(new Option(
              "\tnumber of numeric attributes (default 0)",
              "N", 1, "-N"));
    newVector.addElement(new Option(
              "\tseed for random function (default 1)",
              "S", 1, "-S"));
    newVector.addElement(new Option(
              "\tswitch on voting (default is no voting)",
              "V", 1, "-V"));
    return newVector.elements();
  }

  /**
   * Parses a list of options for this object. <p>
   *
   * For list of valid options see class description.<p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    boolean voting = false;

    String ruleSizeString = Utils.getOption('R', options);
    if (ruleSizeString.length() != 0) {
      setMaxRuleSize((int)Double.valueOf(ruleSizeString).doubleValue());
    } else {
      setMaxRuleSize(10);
    }

    ruleSizeString = Utils.getOption('M', options);
    if (ruleSizeString.length() != 0) {
      setMinRuleSize((int)Double.valueOf(ruleSizeString).doubleValue());
    } else {
      setMinRuleSize(1);
    }

    String numIrrelevantString = Utils.getOption('I', options);
    if (numIrrelevantString.length() != 0) {
      setNumIrrelevant((int)Double.valueOf(numIrrelevantString).doubleValue());
    } else {
      setNumIrrelevant(0);
    }

    if ((getNumAttributes() - getNumIrrelevant()) < getMinRuleSize())
       throw new Exception("Possible rule size is below minimal rule size.");

    String numNumericString = Utils.getOption('N', options);
    if (numNumericString.length() != 0) {
      setNumNumeric((int)Double.valueOf(numNumericString).doubleValue());
    } else {
      setNumNumeric(0);
    }

    String seedString = Utils.getOption('S', options);
    if (seedString.length() != 0) {
      setSeed(Integer.parseInt(seedString));
    } else {
      setSeed(1);
    }
   
    voting = Utils.getFlag('V', options);
    setVoteFlag(voting);
  }

  /**
   * Gets the current settings of the datagenerator RDG1.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [12];
    int current = 0;
    options[current++] = "-N"; options[current++] = "" + getNumNumeric();
    options[current++] = "-I"; options[current++] = "" + getNumIrrelevant();
    options[current++] = "-M"; options[current++] = "" + getMinRuleSize();
    options[current++] = "-R"; options[current++] = "" + getMaxRuleSize();
    options[current++] = "-S"; options[current++] = "" + getSeed();
    if (getVoteFlag()) {
      options[current++] = "-V"; 
      options[current++] = "";
    }

    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }
    
  /**
   * Gets the random generator.
   *
   * @return the random generator
   */
  public Random getRandom() {
    if (m_Random == null) {
      m_Random = new Random (getSeed());
    }
    return m_Random;
  }
  
  /**
   * Sets the random generator.
   *
   * @param newRandom is the random generator.
   */
  public void setRandom(Random newRandom) {
    m_Random = newRandom;
  }

  /**
   * Gets the maximum number of tests in rules.
   *
   * @return the maximum number of tests allowed in rules
   */
  public int getMaxRuleSize() { return m_MaxRuleSize; }
  
  /**
   * Sets the maximum number of tests in rules.
   *
   * @param newMaxRuleSize new maximum number of tests allowed in rules.
   */
  public void setMaxRuleSize(int newMaxRuleSize) {
    m_MaxRuleSize = newMaxRuleSize;
  }

  /**
   * Gets the minimum number of tests in rules.
   *
   * @return the minimum number of tests allowed in rules
   */
  public int getMinRuleSize() { return m_MinRuleSize; }
  
  /**
   * Sets the minimum number of tests in rules.
   *
   * @param newMinRuleSize new minimum number of test in rules.
   */
  public void setMinRuleSize(int newMinRuleSize) {
    m_MinRuleSize = newMinRuleSize;
  }

  /**
   * Gets the number of irrelevant attributes.
   *
   * @return the number of irrelevant attributes
   */
  public int getNumIrrelevant() { return m_NumIrrelevant; }
  
  /**
   * Sets the number of irrelevant attributes.
   *
   * @param the number of irrelevant attributes.
   */
  public void setNumIrrelevant(int newNumIrrelevant) {
    m_NumIrrelevant = newNumIrrelevant;
  }

  /**
   * Gets the number of numerical attributes.
   *
   * @return the number of numerical attributes.
   */
  public int getNumNumeric() { return m_NumNumeric; }
  
  /**
   * Sets the number of numerical attributes.
   *
   * @param the number of numerical attributes.
   */
  public void setNumNumeric(int newNumNumeric) { 
    m_NumNumeric = newNumNumeric;
  }

  /**
   * Gets the vote flag.
   *
   * @return voting flag.
   */
  public boolean getVoteFlag() { return m_VoteFlag; }
  
  /**
   * Sets the vote flag.
   *
   * @param newVoteFlag boolean with the new setting of the vote flag.
   */
  public void setVoteFlag(boolean newVoteFlag) { m_VoteFlag = newVoteFlag; }  

  /**
   * Gets the single mode flag.
   *
   * @return true if methode generateExample can be used.
   */
  public boolean getSingleModeFlag() { return (getVoteFlag() == false); }
  
  /**
   * Gets the random number seed.
   *
   * @return the random number seed.
   */
  public int getSeed() { return m_Seed; }
  
  /**
   * Sets the random number seed.
   *
   * @param newSeed the new random number seed.
   */
  public void setSeed(int newSeed) { m_Seed = newSeed; }  

  /**
   * Gets the dataset format.
   *
   * @return the dataset format.
   */
  public Instances getDatasetFormat() { return m_DatasetFormat; }
  
  /**
   * Sets the dataset format.
   *
   * @param newDatasetFormat the new dataset format.
   */
  public void setDatasetFormat(Instances newDatasetFormat) { 
    m_DatasetFormat = newDatasetFormat;
  }  

  /**
   * Gets the array that defines which of the attributes
   * are seen to be irrelevant.
   *
   * @return the array that defines the irrelevant attributes
   */
  public boolean [] getAttList_Irr() { return m_AttList_Irr; }
  
  /**
   * Sets the array that defines which of the attributes

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -