📄 conjunctiverule.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    private boolean isIn;	    /* Constructor for nominal class */    public NominalAntd(Attribute a, double[] unc){       super(a, unc);      int bag = att.numValues();      stats = new double[bag][m_NumClasses];      coverage = new double[bag];      isIn = true;    }   	    /* Constructor for numeric class */    public NominalAntd(Attribute a, double sq, double vl, double wts){       super(a, sq, vl, wts);      int bag = att.numValues();	          stats = null;      coverage = new double[bag];      isIn = true;    }	    /**     * Implements the splitData function.       * This procedure is to split the data into bags according      * to the nominal attribute value     * the data with missing values are stored in the last bag.     * The infoGain for each bag is also calculated.       *      * @param data the data to be split     * @param defInfo the default information for data     * @return the array of data after split     */    public Instances[] splitData(Instances data, double defInfo){      int bag = att.numValues();      Instances[] splitData = new Instances[bag+1];      double[] wSq = new double[bag];      double[] wVl = new double[bag];      double totalWS=0, totalWV=0, msingWS=0, msingWV=0, sum=data.sumOfWeights();      double[] all = new double[m_NumClasses];      double[] missing = new double[m_NumClasses];	   	          for(int w=0; w < m_NumClasses; w++)	all[w] = missing[w] = 0;      for(int x=0; x<bag; x++){	coverage[x] = wSq[x] = wVl[x] = 0;	if(stats != null)	  for(int y=0; y < m_NumClasses; y++)	    stats[x][y] = 0;			splitData[x] = new Instances(data, data.numInstances());      }      splitData[bag] = new Instances(data, data.numInstances());	          // Record the statistics of data      for(int x=0; x<data.numInstances(); x++){	Instance inst=data.instance(x);	if(!inst.isMissing(att)){	  int v = (int)inst.value(att);	  splitData[v].add(inst);	  coverage[v] += inst.weight();	  if(m_ClassAttribute.isNominal()){ // Nominal class				    stats[v][(int)inst.classValue()] += inst.weight();	    all[(int)inst.classValue()] += inst.weight();	    	  }	  else{                             // Numeric class	    wSq[v] += inst.weight() * inst.classValue() * inst.classValue();	    wVl[v] += inst.weight() * inst.classValue();	    totalWS += inst.weight() * inst.classValue() * inst.classValue();	    totalWV += inst.weight() * inst.classValue();	  }	}	else{	  splitData[bag].add(inst);	  if(m_ClassAttribute.isNominal()){ // Nominal class	    all[(int)inst.classValue()] += inst.weight();	    missing[(int)inst.classValue()] += inst.weight();	  }	  else{                            // Numeric class	    totalWS += inst.weight() * inst.classValue() * inst.classValue();	    totalWV += inst.weight() * inst.classValue();	    msingWS += inst.weight() * inst.classValue() * inst.classValue();	    msingWV += inst.weight() * inst.classValue();		 	  }	}      }	          // The total weights of the whole grow data      double whole;      if(m_ClassAttribute.isNominal())	whole = sum + Utils.sum(uncover);      else	whole = sum + uncoverSum;	        // Find the split        double minEntrp=Double.MAX_VALUE;      maxInfoGain = 0;	          // Check if >=2 splits have more than the minimal data      int count=0;      for(int x=0; x<bag; x++)	if(Utils.grOrEq(coverage[x], m_MinNo))		    	  ++count;	          if(count < 2){ // Don't split	maxInfoGain = 0;	inform = defInfo;	value = Double.NaN;	return null;      }	          for(int x=0; x<bag; x++){			double t = coverage[x], entrp, infoGain;	if(Utils.sm(t, m_MinNo))	  continue;			if(m_ClassAttribute.isNominal()){ // Nominal class	   	  double[] other = new double[m_NumClasses];	  for(int y=0; y < m_NumClasses; y++)	    other[y] = all[y] - stats[x][y] + uncover[y]; 	  double otherCover = whole - t;			    	  // Entropies of data covered and uncovered 		  entrp = entropy(stats[x], t);	  double uncEntp = entropy(other, otherCover);		    	  // Weighted average	  infoGain = defInfo - (entrp*t + uncEntp*otherCover)/whole;		   	}		else{                             // Numeric class	  double weight = (whole - t);	  entrp = wtMeanSqErr(wSq[x], wVl[x], t)/t;	  infoGain = defInfo - (entrp * t) - 	    wtMeanSqErr((totalWS-wSq[x]+uncoverWtSq),			(totalWV-wVl[x]+uncoverWtVl), 			weight);		  	}   					// Test the exclusive expression	boolean isWithin =true;			if(m_IsExclude){	  double infoGain2, entrp2;	  if(m_ClassAttribute.isNominal()){ // Nominal class		    double[] other2 = new double[m_NumClasses];	    double[] notIn = new double[m_NumClasses];	    for(int y=0; y < m_NumClasses; y++){	      other2[y] = stats[x][y] + missing[y] + uncover[y];	      notIn[y] = all[y] - stats[x][y] - missing[y];	    } 				    double msSum = Utils.sum(missing);	    double otherCover2 = t + msSum + Utils.sum(uncover);				    entrp2 = entropy(notIn, (sum-t-msSum));	    double uncEntp2 = entropy(other2, otherCover2);	    infoGain2 = defInfo - 	      (entrp2*(sum-t-msSum) + uncEntp2*otherCover2)/whole;	  }	  else{                             // Numeric class	    double msWts = splitData[bag].sumOfWeights();	    double weight2 = t + uncoverSum + msWts;				    entrp2 = wtMeanSqErr((totalWS-wSq[x]-msingWS),				 (totalWV-wVl[x]-msingWV),(sum-t-msWts))	      /(sum-t-msWts);	    infoGain2 = defInfo - entrp2 * (sum-t-msWts) -	      wtMeanSqErr((wSq[x]+uncoverWtSq+msingWS),			  (wVl[x]+uncoverWtVl+msingWV), 			  weight2);	  }		    	  // Use the exclusive expression?	  if (Utils.gr(infoGain2, infoGain) ||	      (Utils.eq(infoGain2, infoGain) && Utils.sm(entrp2, entrp))){	    infoGain = infoGain2;	    entrp = entrp2;	    isWithin =false;	  }	}			// Test this split	if (Utils.gr(infoGain, maxInfoGain) ||	    (Utils.eq(infoGain, maxInfoGain) && Utils.sm(entrp, minEntrp))){	  value = (double)x;	  maxInfoGain = infoGain;	  inform = maxInfoGain - defInfo;	  minEntrp = entrp;	  isIn = isWithin;	}		      }	          return splitData;    }	    /**     * Whether the instance is covered by this antecedent     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance is covered      *         by this antecedent     */    public boolean isCover(Instance inst){	        boolean isCover=false;      if(!inst.isMissing(att)){	if(isIn){	  if(Utils.eq(inst.value(att), value))	    isCover=true;	}	else if(!Utils.eq(inst.value(att), value))	  isCover=true;      }      return isCover;    }	    /**     * Whether the expression is "att = value" or att != value"     * for this nominal attribute.  True if in the former expression,      * otherwise the latter     *      * @return the boolean value     */    public boolean isIn(){	       return isIn;    }	    /**     * Prints this antecedent     *     * @return a textual description of this antecedent     */    public String toString() {      String symbol = isIn ? " = " : " != ";	          return (att.name() + symbol + att.value((int)value));    }   }      /**   * Returns an enumeration describing the available options   * Valid options are: <p>   *   * -N number <br>   * Set number of folds for REP. One fold is   * used as the pruning set. (Default: 3) <p>   *   * -R <br>   * Set if NOT randomize the data before split to growing and    * pruning data. If NOT set, the seed of randomization is    * specified by the -S option. (Default: randomize) <p>   *    * -S <br>   * Seed of randomization. (Default: 1)<p>   *   * -E <br>   * Set whether consider the exclusive expressions for nominal   * attribute split. (Default: false) <p>   *   * -M number <br>   * Set the minimal weights of instances within a split.   * (Default: 2) <p>   *   * -P number <br>   * Set the number of antecedents allowed in the rule if pre-pruning   * is used.  If this value is other than -1, then pre-pruning will be   * used, otherwise the rule uses REP. (Default: -1) <p>   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector newVector = new Vector(6);	    newVector.addElement(new Option("\tSet number of folds for REP\n" +				    "\tOne fold is used as pruning set.\n" +				    "\t(default 3)","N", 1, "-N <number of folds>"));	    newVector.addElement(new Option("\tSet if NOT uses randomization\n" +				    "\t(default:use randomization)","R", 0, "-R"));    newVector.addElement(new Option("\tSet whether consider the exclusive\n" +				    "\texpressions for nominal attributes\n"+				    "\t(default false)","E", 0, "-E"));	    newVector.addElement(new Option("\tSet the minimal weights of instances\n" +				    "\twithin a split.\n" +				    "\t(default 2.0)","M", 1, "-M <min. weights>"));        newVector.addElement(new Option("\tSet number of antecedents for pre-pruning\n" +				    "\tif -1, then REP is used\n" +				    "\t(default -1)","P", 1, "-P <number of antecedents>"));        newVector.addElement(new Option("\tSet the seed of randomization\n" +				    "\t(default 1)","S", 1, "-S <seed>"));        return newVector.elements();  }      /**   * Parses a given list of options.   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {	    String numFoldsString = Utils.getOption('N', options);    if (numFoldsString.length() != 0)       m_Folds = Integer.parseInt(numFoldsString);    else       m_Folds = 3;    String minNoString = Utils.getOption('M', options);    if (minNoString.length() != 0)       m_MinNo = Double.parseDouble(minNoString);    else       m_MinNo = 2.0;	    String seedString = Utils.getOption('S', options);    if (seedString.length() != 0)       m_Seed = Integer.parseInt(seedString);    else       m_Seed = 1;	    String numAntdsString = Utils.getOption('P', options);    if (numAntdsString.length() != 0)       m_NumAntds = Integer.parseInt(numAntdsString);    else       m_NumAntds = -1;	    m_IsRandomized = (!Utils.getFlag('R', options));    m_IsExclude = Utils.getFlag('E', options);	  }      /**   * Gets the current settings of the Classifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {	    String [] options = new String [10];    int current = 0;    options[current++] = "-N"; options[current++] = "" + m_Folds;    options[current++] = "-M"; options[current++] = "" + m_MinNo;    options[current++] = "-P"; options[current++] = "" + m_NumAntds;    options[current++] = "-S"; options[current++] = "" + m_Seed;    if(!m_IsRandomized)      options[current++] = "-R";    if(m_IsExclude)      options[current++] = "-E";	    while (current < options.length)       options[current++] = "";    return options;  }      /** The access functions for parameters */  public void setFolds(int folds){  m_Folds = folds; }  public int getFolds(){ return m_Folds; }  public void setSeed(long s){ m_Seed = s; }  public long getSeed(){ return m_Seed; }  public boolean getRandomized(){ return m_IsRandomized;}  public void setRandomized(boolean r){ m_IsRandomized = r;}  public boolean getExclusive(){ return m_IsExclude;}  public void setExclusive(boolean e){ m_IsExclude = e;}  public void setMinNo(double m){  m_MinNo = m; }  public double getMinNo(){ return m_MinNo; }  public void setNumAntds(int n){  m_NumAntds = n; }  public int getNumAntds(){ return m_NumAntds; }      /**   * Builds a single rule learner with REP dealing with nominal classes or   * numeric classes.   * For nominal classes, this rule learner predicts a distribution on   * the classes.   * For numeric classes, this learner predicts a single value.   *   * @param instances the training data   * @exception Exception if classifier can't be built successfully   */  public void buildClassifier(Instances instances) throws Exception {    if (instances.checkForStringAttributes())      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");	     Instances data = new Instances(instances);    if(data.numInstances() == 0)	throw new Exception("No training data!");    data.deleteWithMissingClass();        if(data.numInstances() == 0)	throw new Exception("Not training data without missing class values.");    if(data.numInstances() < m_Folds)      throw new Exception("Not enough data for REP.");    m_ClassAttribute = data.classAttribute();    if(m_ClassAttribute.isNominal())      m_NumClasses = m_ClassAttribute.numValues();    else      m_NumClasses = 1;	    m_Antds = new FastVector();    m_DefDstr = new double[m_NumClasses];    m_Cnsqt = new double[m_NumClasses];    m_Targets = new FastVector();	        m_Random = new Random(m_Seed);        if(m_IsRandomized){  // Randomize the data		data.randomize(m_Random);    }        if(m_NumAntds != -1){      grow(data);    }    else{      // Split data into Grow and Prune	         data.stratify(m_Folds);	      Instances growData=data.trainCV(m_Folds, m_Folds-1);      Instances pruneData=data.testCV(m_Folds, m_Folds-1);      grow(growData);      // Build this rule        prune(pruneData);    // Prune this rule		  	      }	    if(m_ClassAttribute.isNominal()){			         Utils.normalize(m_Cnsqt);      if(Utils.gr(Utils.sum(m_DefDstr), 0))	Utils.normalize(m_DefDstr);    }	  }      /**   * Computes class distribution for the given instance.   *   * @param instance the instance for which distribution is to be computed   * @return the class distribution for the given instance   */  public double[] distributionForInstance(Instance instance) throws Exception {      if(instance == null)	  throw new Exception("Testing instance is NULL!");	    if (isCover(instance))		      return m_Cnsqt;    else      return m_DefDstr;  }   /**   * Whether the instance covered by this rule   *    * @param inst the instance in question   * @return the boolean value indicating whether the instance is covered by this rule   */  public boolean isCover(Instance datum){    boolean isCover=true;    for(int i=0; i<m_Antds.size(); i++){      Antd antd = (Antd)m_Antds.elementAt(i);      if(!antd.isCover(datum)){	isCover = false;	break;      }    }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -