⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rdg1.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
   * are seen to be irrelevant.
   *
   * @param newAttList_Irr array that defines the irrelevant attributes.
   */
  public void setAttList_Irr(boolean [] newAttList_Irr) {

    m_AttList_Irr = newAttList_Irr;
  }

  /**
   * Initializes the format for the dataset produced. 
   *
   * @return the output data format
   * @exception Exception data format could not be defined 
   */

  public Instances defineDataFormat() throws Exception {

    Instances dataset;
    Random random = new Random (getSeed());
    setRandom(random);

    m_DecisionList = new FastVector();

    // number of examples is the same as given per option
    setNumExamplesAct(getNumExamples());

    // define dataset
    dataset = defineDataset(random);
    return dataset; 
  }

  /**
   * Generate an example of the dataset dataset. 
   * @return the instance generated
   * @exception Exception if format not defined or generating <br>
   * examples one by one is not possible, because voting is chosen
   */

  public Instance generateExample() throws Exception {

    Random random = getRandom();
    Instances format = getDatasetFormat();
    if (format == null) throw new Exception("Dataset format not defined.");
    if (getVoteFlag()) throw new Exception("Examples cannot be generated" +
                                           " one by one.");

    // generate values for all attributes
    format = generateExamples(1, random, format);

    return (format.lastInstance());
  }

  /**
   * Generate all examples of the dataset. 
   * @return the instance generated
   * @exception Exception if format not defined or generating <br>
   * examples one by one is not possible, because voting is chosen
   */

  public Instances generateExamples() throws Exception {

    Random random = getRandom();
    Instances format = getDatasetFormat();
    if (format == null) throw new Exception("Dataset format not defined.");

    // generate values for all attributes
    format = generateExamples(getNumExamplesAct(), random, format);

    // vote all examples, and set new class value
    if (getVoteFlag())
      format = voteDataset(format);

    return (format);
  }

  /**
   * Generate all examples of the dataset. 
   * @return the instance generated
   * @exception Exception if format not defined or generating <br>
   * examples one by one is not possible, because voting is chosen
   */

  public Instances generateExamples(int num, 
                                   Random random,
                                   Instances format) throws Exception {

    if (format == null) throw new Exception("Dataset format not defined.");
    
    // generate values for all attributes
    for (int i = 0; i < num; i++)  {
      // over all examples to be produced
      Instance example =  generateExample(random, format);

      // set class of example using decision list
      boolean classDefined = classifyExample(example);
      if (!classDefined) {
        // set class with newly generated rule
        example = updateDecisionList(random, example);
      }
      example.setDataset(format);
      format.add(example);
    }

    return (format);
  }

 /**
   * Generates a new rule for the decision list.
   * and classifies the new example
   * @param random random number generator
   * @param example example used to update decision list 
   */
  private Instance updateDecisionList(Random random, Instance example)
   throws Exception {

    FastVector TestList;
    Instances format = getDatasetFormat();
    if (format == null) throw new Exception("Dataset format not defined.");

    TestList = generateTestList(random, example);

    int maxSize = getMaxRuleSize() < TestList.size() ? 
                            getMaxRuleSize() : TestList.size();
    int ruleSize = ((int) (random.nextDouble() * 
                             (double) (maxSize - getMinRuleSize())))
                                   + getMinRuleSize();

    RuleList newRule = new RuleList();
    for (int i=0; i < ruleSize; i++) {
      int testIndex = (int) (random.nextDouble() * (double) TestList.size());
      Test test = (Test) TestList.elementAt(testIndex);
          
      newRule.addTest(test);
      TestList.removeElementAt(testIndex);
//      newRule.addTest((Test) TestList.elementAt(
//                       (int) (random.nextDouble() * (double) ruleSize)));
    }
    double newClassValue = 0.0;
    if (m_DecisionList.size() > 0) {
      RuleList r = (RuleList)(m_DecisionList.lastElement());
      double oldClassValue = (double) 
                        (r.getClassValue());
      newClassValue = (double)((int)oldClassValue + 1)
                               % getNumClasses();
    }
    newRule.setClassValue(newClassValue);
    m_DecisionList.addElement(newRule);
    example = new Instance(example);
    example.setDataset(format);
    example.setClassValue(newClassValue);
    return example;
  }

 /**
   * Generates a new rule for the decision list
   * and classifies the new example.
   *
   * @param random random number generator
   * @param example 
   */
  private FastVector generateTestList(Random random, Instance example) 
   throws Exception {

    Instances format = getDatasetFormat();
    if (format == null) throw new Exception("Dataset format not defined.");

    int numTests = getNumAttributes() - getNumIrrelevant();
    FastVector TestList = new FastVector(numTests);
    boolean [] irrelevant = getAttList_Irr();

    for (int i = 0; i < getNumAttributes(); i++) {
      if (!irrelevant[i]) {
        Test newTest = null;
        Attribute att = example.attribute(i);
        if (att.isNumeric()) {
          double newSplit = random.nextDouble();
          boolean newNot = newSplit < example.value(i);
          newTest = new Test(i, newSplit, format, newNot);
        } else {
          newTest = new Test(i, example.value(i), format, false);
        }
      TestList.addElement (newTest);     
      }
    }
  return TestList;
  }

 /**
   * Generates an example with its classvalue set to missing
   * and binds it to the datasets.
   *
   * @param random random number generator
   * @param dataset dataset the example gets bind to
   */
  private Instance generateExample(Random random, Instances format) 
    throws Exception {     
    double [] attributes;
    Instance example;

    attributes = new double[getNumAttributes() + 1];
    for (int i = 0; i < getNumAttributes(); i++) {
      double value = random.nextDouble();
      if (format.attribute(i).isNumeric()) {
        attributes[i] = value; 
      } else {
	if (format.attribute(i).isNominal()) {
	  attributes[i] = (value > 0.5)? 1.0 : 0.0;
	} else {
	  throw new Exception ("Attribute type is not supported.");
	}
      }
    }
    example = new Instance(0, attributes);
    example.setDataset(format);
    example.setClassMissing();
    return example; 
  }

 /**
   * Tries to classify an example. 
   * 
   * @param example
   */
  private boolean classifyExample(Instance example) throws Exception {
    double classValue = -1.0;  

    for (Enumeration e = m_DecisionList.elements(); 
         e.hasMoreElements() && classValue < 0.0;) {
      RuleList rl = (RuleList) e.nextElement();
      classValue = rl.classifyInstance(example);   
    }
    if (classValue >= 0.0) {
      example.setClassValue(classValue);
      return true;
    } else return false;
  }

 /**
   * Classify example with maximum vote the following way.
   * With every rule in the decisionlist, it is evaluated if
   * the given instance could be the class of the rule.
   * Finally the class value that receives the highest number of votes
   * is assigned to the example.
   * 
   * @param example example to be reclassified
   * @return instance with new class value
   */
  private Instance votedReclassifyExample(Instance example) throws Exception {

    boolean classDefined = false; 
    int classVotes [] = new int [getNumClasses()]; 
    for (int i = 0; i < classVotes.length; i++) classVotes[i] = 0; 

    for (Enumeration e = m_DecisionList.elements(); 
         e.hasMoreElements();) {
      RuleList rl = (RuleList) e.nextElement();
      int classValue = (int) rl.classifyInstance(example);
      if (classValue >= 0) classVotes[classValue]++;  
    }
    int maxVote = 0;
    int vote = -1;
    for (int i = 0; i < classVotes.length; i++) {
      if (classVotes[i] > maxVote) {
        maxVote = classVotes[i];
        vote = i; 
      }
    }
    if (vote >= 0) {
      example.setClassValue((double) vote);
    } else
      throw new Exception ("Error in instance classification.");
  return example;
  }

 /**
   * Returns a dataset header.
   * @param random random number generator
   * @return dataset header
   */
  private Instances defineDataset(Random random) throws Exception {

    boolean [] attList_Irr;
    int [] attList_Num;
    FastVector attributes = new FastVector();
    Attribute attribute;
    FastVector nominalValues = new FastVector (2);
    nominalValues.addElement("false"); 
    nominalValues.addElement("true"); 
    FastVector classValues = new FastVector (getNumClasses());
    Instances dataset;
     
    // set randomly those attributes that are irrelevant
    attList_Irr = defineIrrelevant(random);
    setAttList_Irr(attList_Irr);

    // set randomly those attributes that are numeric
    attList_Num = defineNumeric(random); 

    // define dataset
    for (int i = 0; i < getNumAttributes(); i++) {
      if (attList_Num[i] == Attribute.NUMERIC) {
        attribute = new Attribute("a" + i); 
      }
      else {       
        attribute = new Attribute("a" + i, nominalValues); 
      }
      attributes.addElement(attribute);
    }
    int s = classValues.capacity();
    for (int i = 0; i < classValues.capacity(); i++) {
      classValues.addElement("c" + i);
    }
    attribute = new Attribute ("class", classValues); 
    attributes.addElement(attribute);

    dataset = new Instances(getRelationName(), attributes,
                            getNumExamplesAct());
    dataset.setClassIndex(getNumAttributes());

    // set dataset format of this class
    Instances format = new Instances(dataset, 0);
    setDatasetFormat(format);
    return dataset; 
  } 

 /**
   * Defines randomly the attributes as irrelevant.
   * Number of attributes to be set as irrelevant is either set
   * with a preceeding call of setNumIrrelevant() or is per default 0.
   *
   * @param random
   * @return list of boolean values with one value for each attribute,
   * and each value set true or false according to if the corresponding
   * attribute was defined irrelevant or not
   */
  private boolean [] defineIrrelevant(Random random) {

    boolean [] irr = new boolean [getNumAttributes()];
 
    // initialize
    for (int i = 0; i < irr.length; i++) {
      irr[i] = false;
    }
    // set randomly
    int numIrr = 0;
    for (int i = 0; 
         (numIrr < getNumIrrelevant()) && (i < getNumAttributes() * 5);
          i++) {
      int maybeNext = (int) (random.nextDouble() * (double) irr.length);
      if (irr[maybeNext] == false) {
        irr [maybeNext] = true;
        numIrr++;
      }
    }
    return irr;
  }

 /**
   * Chooses randomly the attributes that get datatyp numeric.
   * @param random
   * @return list of integer values, with one value for each attribute,
   * and each value set to Attribut.NOMINAL or Attribut.NUMERIC
   */
  private int [] defineNumeric(Random random) {
    
    int [] num = new int [getNumAttributes()];

    // initialize
    for (int i = 0; i < num.length; i++) {
      num[i] = Attribute.NOMINAL;
    }
    int numNum = 0;
    for (int i = 0;
         (numNum < getNumNumeric()) && (i < getNumAttributes() * 5); i++) {
      int maybeNext = (int) (random.nextDouble() * (double) num.length);
      if (num[maybeNext] != Attribute.NUMERIC) {
        num[maybeNext] = Attribute.NUMERIC;
        numNum++;
      }
    }
    return num;
  }

  /**
   * Compiles documentation about the data generation. This is the number of
   * irrelevant attributes and the decisionlist with all rules.
   * Considering that the decisionlist might get enhanced until
   * the last instance is generated, this method should be called at the
   * end of the data generation process. 
   *
   * @return string with additional information about generated dataset
   * @exception Exception no input structure has been defined
   */
  public String generateFinished() throws Exception {

    StringBuffer dLString = new StringBuffer();

    // string for output at end of ARFF-File
    boolean [] attList_Irr = getAttList_Irr();
    Instances format = getDatasetFormat();
    dLString.append("\n%\n% Number of attributes chosen as irrelevant = " +
                    getNumIrrelevant() + "\n");
    for (int i = 0; i < attList_Irr.length; i++) {
      if (attList_Irr[i])
        dLString.append("% " + format.attribute(i).name() + "\n");
    }

    dLString.append("%\n% DECISIONLIST (number of rules = " +
                    m_DecisionList.size() + "):\n");
     
    for (int i = 0; i < m_DecisionList.size(); i++) {
      RuleList rl = (RuleList) m_DecisionList.elementAt(i);
      dLString.append("% RULE " + i + ": " + rl.toString() + "\n");
      
    }
    
    return dLString.toString();
  }

 /**
   * Resets the class values of all instances using voting.
   * For each instance the class value that satisfies the most rules
   * is choosen as new class value.
   *
   * @param dataset
   * @return the changed instances
   */
  private Instances voteDataset(Instances dataset) throws Exception {
 
  for (int i = 0; i < dataset.numInstances(); i++) {
    Instance inst = dataset.firstInstance();
    inst = votedReclassifyExample(inst); 
    dataset.add(inst);
    dataset.delete(0);
    }  
  return dataset;
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain arguments for the data producer: 
   */
  public static void main(String [] argv) {

    try {
      Generator.makeData(new RDG1(), argv);
    } catch (Exception ex) {
      System.out.println(ex.getMessage());
    }
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -