📄 conjunctiverule.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
   * Set if NOT randomize the data before split to growing and 
   * pruning data. If NOT set, the seed of randomization is 
   * specified by the -S option. (Default: randomize) <p>
   * 
   * -S <br>
   * Seed of randomization. (Default: 1)<p>
   *
   * -E <br>
   * Set whether consider the exclusive expressions for nominal
   * attribute split. (Default: false) <p>
   *
   * -M number <br>
   * Set the minimal weights of instances within a split.
   * (Default: 2) <p>
   *
   * -P number <br>
   * Set the number of antecedents allowed in the rule if pre-pruning
   * is used.  If this value is other than -1, then pre-pruning will be
   * used, otherwise the rule uses REP. (Default: -1) <p>
   *
   * @return an enumeration of all the available options
   */
  public Enumeration listOptions() {
    Vector newVector = new Vector(6);
	
    newVector.addElement(new Option("\tSet number of folds for REP\n" +
				    "\tOne fold is used as pruning set.\n" +
				    "\t(default 3)","N", 1, "-N <number of folds>"));
	
    newVector.addElement(new Option("\tSet if NOT uses randomization\n" +
				    "\t(default:use randomization)","R", 0, "-R"));

    newVector.addElement(new Option("\tSet whether consider the exclusive\n" +
				    "\texpressions for nominal attributes\n"+
				    "\t(default false)","E", 0, "-E"));
	
    newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
				    "\twithin a split.\n" +
				    "\t(default 2.0)","M", 1, "-M <min. weights>"));
    
    newVector.addElement(new Option("\tSet number of antecedents for pre-pruning\n" +
				    "\tif -1, then REP is used\n" +
				    "\t(default -1)","P", 1, "-P <number of antecedents>"));
    
    newVector.addElement(new Option("\tSet the seed of randomization\n" +
				    "\t(default 1)","S", 1, "-S <seed>"));
    
    return newVector.elements();
  }
    
  /**
   * Parses a given list of options.
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
	
    String numFoldsString = Utils.getOption('N', options);
    if (numFoldsString.length() != 0) 
      m_Folds = Integer.parseInt(numFoldsString);
    else 
      m_Folds = 3;

    String minNoString = Utils.getOption('M', options);
    if (minNoString.length() != 0) 
      m_MinNo = Double.parseDouble(minNoString);
    else 
      m_MinNo = 2.0;
	
    String seedString = Utils.getOption('S', options);
    if (seedString.length() != 0) 
      m_Seed = Integer.parseInt(seedString);
    else 
      m_Seed = 1;
	
    String numAntdsString = Utils.getOption('P', options);
    if (numAntdsString.length() != 0) 
      m_NumAntds = Integer.parseInt(numAntdsString);
    else 
      m_NumAntds = -1;
	
    m_IsExclude = Utils.getFlag('E', options);	
  }
    
  /**
   * Gets the current settings of the Classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {
	
    String [] options = new String [9];
    int current = 0;
    options[current++] = "-N"; options[current++] = "" + m_Folds;
    options[current++] = "-M"; options[current++] = "" + m_MinNo;
    options[current++] = "-P"; options[current++] = "" + m_NumAntds;
    options[current++] = "-S"; options[current++] = "" + m_Seed;

    if(m_IsExclude)
      options[current++] = "-E";
	
    while (current < options.length) 
      options[current++] = "";
    return options;
  }
    
  /** The access functions for parameters */

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String foldsTipText() {
    return "Determines the amount of data used for pruning. One fold is used for "
      + "pruning, the rest for growing the rules.";
  }

  public void setFolds(int folds){  m_Folds = folds; }
  public int getFolds(){ return m_Folds; }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String seedTipText() {
    return "The seed used for randomizing the data.";
  }

  public void setSeed(long s){ m_Seed = s; }
  public long getSeed(){ return m_Seed; }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String exclusiveTipText() {
    return "Set whether to consider exclusive expressions for nominal "
      + "attribute splits.";
  }

  public boolean getExclusive(){ return m_IsExclude;}
  public void setExclusive(boolean e){ m_IsExclude = e;}

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String minNoTipText() {
    return "The minimum total weight of the instances in a rule.";
  }

  public void setMinNo(double m){  m_MinNo = m; }
  public double getMinNo(){ return m_MinNo; }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numAntdsTipText() {
    return "Set the number of antecedents allowed in the rule if "
      + "pre-pruning is used.  If this value is other than -1, then "
      + "pre-pruning will be used, otherwise the rule uses reduced-error "
      + "pruning.";
  }

  public void setNumAntds(int n){  m_NumAntds = n; }
  public int getNumAntds(){ return m_NumAntds; }
    
  /**
   * Builds a single rule learner with REP dealing with nominal classes or
   * numeric classes.
   * For nominal classes, this rule learner predicts a distribution on
   * the classes.
   * For numeric classes, this learner predicts a single value.
   *
   * @param instances the training data
   * @exception Exception if classifier can't be built successfully
   */
  public void buildClassifier(Instances instances) throws Exception {
    if (instances.checkForStringAttributes())
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
	 
    Instances data = new Instances(instances);

    if(data.numInstances() == 0)
	throw new Exception("No training data!");
    data.deleteWithMissingClass();
    
    if(data.numInstances() == 0)
	throw new Exception("Not training data without missing class values.");

    if(data.numInstances() < m_Folds)
      throw new Exception("Not enough data for REP.");

    m_ClassAttribute = data.classAttribute();
    if(m_ClassAttribute.isNominal())
      m_NumClasses = m_ClassAttribute.numValues();
    else
      m_NumClasses = 1;
	
    m_Antds = new FastVector();
    m_DefDstr = new double[m_NumClasses];
    m_Cnsqt = new double[m_NumClasses];
    m_Targets = new FastVector();	    
    m_Random = new Random(m_Seed);
    
    if(m_NumAntds != -1){
      grow(data);
    }
    else{

      data.randomize(m_Random);

      // Split data into Grow and Prune	   
      data.stratify(m_Folds);
	
      Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
      Instances pruneData=data.testCV(m_Folds, m_Folds-1);

      grow(growData);      // Build this rule  
      prune(pruneData);    // Prune this rule		  	  
    }
	
    if(m_ClassAttribute.isNominal()){			   
      Utils.normalize(m_Cnsqt);
      if(Utils.gr(Utils.sum(m_DefDstr), 0))
	Utils.normalize(m_DefDstr);
    }	
  }
    
  /**
   * Computes class distribution for the given instance.
   *
   * @param instance the instance for which distribution is to be computed
   * @return the class distribution for the given instance
   */
  public double[] distributionForInstance(Instance instance) throws Exception {
      if(instance == null)
	  throw new Exception("Testing instance is NULL!");
	
    if (isCover(instance))		
      return m_Cnsqt;
    else
      return m_DefDstr;
  }
 
  /**
   * Whether the instance covered by this rule
   * 
   * @param inst the instance in question
   * @return the boolean value indicating whether the instance is covered by this rule
   */
  public boolean isCover(Instance datum){
    boolean isCover=true;

    for(int i=0; i<m_Antds.size(); i++){
      Antd antd = (Antd)m_Antds.elementAt(i);
      if(!antd.isCover(datum)){
	isCover = false;
	break;
      }
    }
	
    return isCover;
  }        
    
  /**
   * Whether this rule has antecedents, i.e. whether it is a default rule
   * 
   * @return the boolean value indicating whether the rule has antecedents
   */
  public boolean hasAntds(){
    if (m_Antds == null)
      return false;
    else
      return (m_Antds.size() > 0);
  }      

  /**
   * Build one rule using the growing data
   *
   * @param data the growing data used to build the rule
   */    
  private void grow(Instances data){
    Instances growData = new Instances(data);	
    double defInfo;	
    double whole = data.sumOfWeights();

    if(m_NumAntds != 0){
	
      /* Class distribution for data both covered and not covered by one antecedent */
      double[][] classDstr = new double[2][m_NumClasses];
	    
      /* Compute the default information of the growing data */
      for(int j=0; j < m_NumClasses; j++){
	classDstr[0][j] = 0;
	classDstr[1][j] = 0;
      }	
      if(m_ClassAttribute.isNominal()){	    
	for(int i=0; i < growData.numInstances(); i++){
	  Instance datum = growData.instance(i);
	  classDstr[0][(int)datum.classValue()] += datum.weight();
	}
	defInfo = ContingencyTables.entropy(classDstr[0]);    
      }
      else{
	for(int i=0; i < growData.numInstances(); i++){
	  Instance datum = growData.instance(i);
	  classDstr[0][0] += datum.weight() * datum.classValue();
	}
		
	// No need to be divided by the denomitor because
	// it's always the same
	double defMean = (classDstr[0][0] / whole);
	defInfo = meanSquaredError(growData, defMean) * growData.sumOfWeights();    
      }
	    
      // Store the default class distribution	
      double[][] tmp = new double[2][m_NumClasses];
      for(int y=0; y < m_NumClasses; y++){
	if(m_ClassAttribute.isNominal()){	
	  tmp[0][y] = classDstr[0][y];
	  tmp[1][y] = classDstr[1][y];
	}
	else{
	  tmp[0][y] = classDstr[0][y]/whole;
	  tmp[1][y] = classDstr[1][y];
	}
      }
      m_Targets.addElement(tmp); 
	    
      /* Keep the record of which attributes have already been used*/    
      boolean[] used=new boolean[growData.numAttributes()];
      for (int k=0; k<used.length; k++)
	used[k]=false;
      int numUnused=used.length;	
      double maxInfoGain, uncoveredWtSq=0, uncoveredWtVl=0, uncoveredWts=0;	
      boolean isContinue = true; // The stopping criterion of this rule	
	    
      while (isContinue){   
	maxInfoGain = 0;       // We require that infoGain be positive
		
	/* Build a list of antecedents */
	Antd oneAntd=null;
	Instances coverData = null, uncoverData = null;
	Enumeration enumAttr=growData.enumerateAttributes();	    
	int index=-1;  
		
	/* Build one condition based on all attributes not used yet*/
	while (enumAttr.hasMoreElements()){
	  Attribute att= (Attribute)(enumAttr.nextElement());
	  index++;
		    
	  Antd antd =null;	
	  if(m_ClassAttribute.isNominal()){		    
	    if(att.isNumeric())
	      antd = new NumericAntd(att, classDstr[1]);
	    else
	      antd = new NominalAntd(att, classDstr[1]);
	  }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -