⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rdg1.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
   * Sets the array that defines which of the attributes   * are seen to be irrelevant.   *   * @param newAttList_Irr array that defines the irrelevant attributes.   */  public void setAttList_Irr(boolean [] newAttList_Irr) {    m_AttList_Irr = newAttList_Irr;  }  /**   * Initializes the format for the dataset produced.    *   * @return the output data format   * @exception Exception data format could not be defined    */  public Instances defineDataFormat() throws Exception {    Instances dataset;    Random random = new Random (getSeed());    setRandom(random);    m_DecisionList = new FastVector();    // number of examples is the same as given per option    setNumExamplesAct(getNumExamples());    // define dataset    dataset = defineDataset(random);    return dataset;   }  /**   * Generate an example of the dataset dataset.    * @return the instance generated   * @exception Exception if format not defined or generating <br>   * examples one by one is not possible, because voting is chosen   */  public Instance generateExample() throws Exception {    Random random = getRandom();    Instances format = getDatasetFormat();    if (format == null) throw new Exception("Dataset format not defined.");    if (getVoteFlag()) throw new Exception("Examples cannot be generated" +                                           " one by one.");    // generate values for all attributes    format = generateExamples(1, random, format);    return (format.lastInstance());  }  /**   * Generate all examples of the dataset.    * @return the instance generated   * @exception Exception if format not defined or generating <br>   * examples one by one is not possible, because voting is chosen   */  public Instances generateExamples() throws Exception {    Random random = getRandom();    Instances format = getDatasetFormat();    if (format == null) throw new Exception("Dataset format not defined.");    // generate values for all attributes    format = generateExamples(getNumExamplesAct(), random, format);    // vote all examples, and set new class value    if (getVoteFlag())      format = voteDataset(format);    return (format);  }  /**   * Generate all examples of the dataset.    * @return the instance generated   * @exception Exception if format not defined or generating <br>   * examples one by one is not possible, because voting is chosen   */  public Instances generateExamples(int num,                                    Random random,                                   Instances format) throws Exception {    if (format == null) throw new Exception("Dataset format not defined.");        // generate values for all attributes    for (int i = 0; i < num; i++)  {      // over all examples to be produced      Instance example =  generateExample(random, format);      // set class of example using decision list      boolean classDefined = classifyExample(example);      if (!classDefined) {        // set class with newly generated rule        example = updateDecisionList(random, example);      }      example.setDataset(format);      format.add(example);    }    return (format);  } /**   * Generates a new rule for the decision list.   * and classifies the new example   * @param random random number generator   * @param example example used to update decision list    */  private Instance updateDecisionList(Random random, Instance example)   throws Exception {    FastVector TestList;    Instances format = getDatasetFormat();    if (format == null) throw new Exception("Dataset format not defined.");    TestList = generateTestList(random, example);    int maxSize = getMaxRuleSize() < TestList.size() ?                             getMaxRuleSize() : TestList.size();    int ruleSize = ((int) (random.nextDouble() *                              (double) (maxSize - getMinRuleSize())))                                   + getMinRuleSize();    RuleList newRule = new RuleList();    for (int i=0; i < ruleSize; i++) {      int testIndex = (int) (random.nextDouble() * (double) TestList.size());      Test test = (Test) TestList.elementAt(testIndex);                newRule.addTest(test);      TestList.removeElementAt(testIndex);//      newRule.addTest((Test) TestList.elementAt(//                       (int) (random.nextDouble() * (double) ruleSize)));    }    double newClassValue = 0.0;    if (m_DecisionList.size() > 0) {      RuleList r = (RuleList)(m_DecisionList.lastElement());      double oldClassValue = (double)                         (r.getClassValue());      newClassValue = (double)((int)oldClassValue + 1)                               % getNumClasses();    }    newRule.setClassValue(newClassValue);    m_DecisionList.addElement(newRule);    example = new Instance(example);    example.setDataset(format);    example.setClassValue(newClassValue);    return example;  } /**   * Generates a new rule for the decision list   * and classifies the new example.   *   * @param random random number generator   * @param example    */  private FastVector generateTestList(Random random, Instance example)    throws Exception {    Instances format = getDatasetFormat();    if (format == null) throw new Exception("Dataset format not defined.");    int numTests = getNumAttributes() - getNumIrrelevant();    FastVector TestList = new FastVector(numTests);    boolean [] irrelevant = getAttList_Irr();    for (int i = 0; i < getNumAttributes(); i++) {      if (!irrelevant[i]) {        Test newTest = null;        Attribute att = example.attribute(i);        if (att.isNumeric()) {          double newSplit = random.nextDouble();          boolean newNot = newSplit < example.value(i);          newTest = new Test(i, newSplit, format, newNot);        } else {          newTest = new Test(i, example.value(i), format, false);        }      TestList.addElement (newTest);           }    }  return TestList;  } /**   * Generates an example with its classvalue set to missing   * and binds it to the datasets.   *   * @param random random number generator   * @param dataset dataset the example gets bind to   */  private Instance generateExample(Random random, Instances format)     throws Exception {         double [] attributes;    Instance example;    attributes = new double[getNumAttributes() + 1];    for (int i = 0; i < getNumAttributes(); i++) {      double value = random.nextDouble();      if (format.attribute(i).isNumeric()) {        attributes[i] = value;       } else {	if (format.attribute(i).isNominal()) {	  attributes[i] = (value > 0.5)? 1.0 : 0.0;	} else {	  throw new Exception ("Attribute type is not supported.");	}      }    }    example = new Instance(0, attributes);    example.setDataset(format);    example.setClassMissing();    return example;   } /**   * Tries to classify an example.    *    * @param example   */  private boolean classifyExample(Instance example) throws Exception {    double classValue = -1.0;      for (Enumeration e = m_DecisionList.elements();          e.hasMoreElements() && classValue < 0.0;) {      RuleList rl = (RuleList) e.nextElement();      classValue = rl.classifyInstance(example);       }    if (classValue >= 0.0) {      example.setClassValue(classValue);      return true;    } else return false;  } /**   * Classify example with maximum vote the following way.   * With every rule in the decisionlist, it is evaluated if   * the given instance could be the class of the rule.   * Finally the class value that receives the highest number of votes   * is assigned to the example.   *    * @param example example to be reclassified   * @return instance with new class value   */  private Instance votedReclassifyExample(Instance example) throws Exception {    boolean classDefined = false;     int classVotes [] = new int [getNumClasses()];     for (int i = 0; i < classVotes.length; i++) classVotes[i] = 0;     for (Enumeration e = m_DecisionList.elements();          e.hasMoreElements();) {      RuleList rl = (RuleList) e.nextElement();      int classValue = (int) rl.classifyInstance(example);      if (classValue >= 0) classVotes[classValue]++;      }    int maxVote = 0;    int vote = -1;    for (int i = 0; i < classVotes.length; i++) {      if (classVotes[i] > maxVote) {        maxVote = classVotes[i];        vote = i;       }    }    if (vote >= 0) {      example.setClassValue((double) vote);    } else      throw new Exception ("Error in instance classification.");  return example;  } /**   * Returns a dataset header.   * @param random random number generator   * @return dataset header   */  private Instances defineDataset(Random random) throws Exception {    boolean [] attList_Irr;    int [] attList_Num;    FastVector attributes = new FastVector();    Attribute attribute;    FastVector nominalValues = new FastVector (2);    nominalValues.addElement("false");     nominalValues.addElement("true");     FastVector classValues = new FastVector (getNumClasses());    Instances dataset;         // set randomly those attributes that are irrelevant    attList_Irr = defineIrrelevant(random);    setAttList_Irr(attList_Irr);    // set randomly those attributes that are numeric    attList_Num = defineNumeric(random);     // define dataset    for (int i = 0; i < getNumAttributes(); i++) {      if (attList_Num[i] == Attribute.NUMERIC) {        attribute = new Attribute("a" + i);       }      else {               attribute = new Attribute("a" + i, nominalValues);       }      attributes.addElement(attribute);    }    int s = classValues.capacity();    for (int i = 0; i < classValues.capacity(); i++) {      classValues.addElement("c" + i);    }    attribute = new Attribute ("class", classValues);     attributes.addElement(attribute);    dataset = new Instances(getRelationName(), attributes,                            getNumExamplesAct());    dataset.setClassIndex(getNumAttributes());    // set dataset format of this class    Instances format = new Instances(dataset, 0);    setDatasetFormat(format);    return dataset;   }  /**   * Defines randomly the attributes as irrelevant.   * Number of attributes to be set as irrelevant is either set   * with a preceeding call of setNumIrrelevant() or is per default 0.   *   * @param random   * @return list of boolean values with one value for each attribute,   * and each value set true or false according to if the corresponding   * attribute was defined irrelevant or not   */  private boolean [] defineIrrelevant(Random random) {    boolean [] irr = new boolean [getNumAttributes()];     // initialize    for (int i = 0; i < irr.length; i++) {      irr[i] = false;    }    // set randomly    int numIrr = 0;    for (int i = 0;          (numIrr < getNumIrrelevant()) && (i < getNumAttributes() * 5);          i++) {      int maybeNext = (int) (random.nextDouble() * (double) irr.length);      if (irr[maybeNext] == false) {        irr [maybeNext] = true;        numIrr++;      }    }    return irr;  } /**   * Chooses randomly the attributes that get datatyp numeric.   * @param random   * @return list of integer values, with one value for each attribute,   * and each value set to Attribut.NOMINAL or Attribut.NUMERIC   */  private int [] defineNumeric(Random random) {        int [] num = new int [getNumAttributes()];    // initialize    for (int i = 0; i < num.length; i++) {      num[i] = Attribute.NOMINAL;    }    int numNum = 0;    for (int i = 0;         (numNum < getNumNumeric()) && (i < getNumAttributes() * 5); i++) {      int maybeNext = (int) (random.nextDouble() * (double) num.length);      if (num[maybeNext] != Attribute.NUMERIC) {        num[maybeNext] = Attribute.NUMERIC;        numNum++;      }    }    return num;  }  /**   * Compiles documentation about the data generation. This is the number of   * irrelevant attributes and the decisionlist with all rules.   * Considering that the decisionlist might get enhanced until   * the last instance is generated, this method should be called at the   * end of the data generation process.    *   * @return string with additional information about generated dataset   * @exception Exception no input structure has been defined   */  public String generateFinished() throws Exception {    StringBuffer dLString = new StringBuffer();    // string for output at end of ARFF-File    boolean [] attList_Irr = getAttList_Irr();    Instances format = getDatasetFormat();    dLString.append("\n%\n% Number of attributes chosen as irrelevant = " +                    getNumIrrelevant() + "\n");    for (int i = 0; i < attList_Irr.length; i++) {      if (attList_Irr[i])        dLString.append("% " + format.attribute(i).name() + "\n");    }    dLString.append("%\n% DECISIONLIST (number of rules = " +                    m_DecisionList.size() + "):\n");         for (int i = 0; i < m_DecisionList.size(); i++) {      RuleList rl = (RuleList) m_DecisionList.elementAt(i);      dLString.append("% RULE " + i + ": " + rl.toString() + "\n");          }        return dLString.toString();  } /**   * Resets the class values of all instances using voting.   * For each instance the class value that satisfies the most rules   * is choosen as new class value.   *   * @param dataset   * @return the changed instances   */  private Instances voteDataset(Instances dataset) throws Exception {   for (int i = 0; i < dataset.numInstances(); i++) {    Instance inst = dataset.firstInstance();    inst = votedReclassifyExample(inst);     dataset.add(inst);    dataset.delete(0);    }    return dataset;  }  /**   * Main method for testing this class.   *   * @param argv should contain arguments for the data producer:    */  public static void main(String [] argv) {    try {      Generator.makeData(new RDG1(), argv);    } catch (Exception ex) {      System.out.println(ex.getMessage());    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -