⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qboost.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
  /**   * Get whether resampling is turned on   *   * @return true if resampling output is on   */  public boolean getUseResampling() {    return m_UseResampling;  }  /**   * Boosting method.   *   * @param data the training data to be used for generating the   * boosted classifier.   * @exception Exception if the classifier could not be built successfully   */  public void buildClassifier(Instances data) throws Exception {            //Initialize measures      initMeasures();          if (data.checkForStringAttributes()) {      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");    }    data = new Instances(data);    data.deleteWithMissingClass();    if (data.numInstances() == 0) {      throw new Exception("No train instances without class missing!");    }    if (data.classAttribute().isNumeric()) {      throw new UnsupportedClassTypeException("QBoost can't handle a numeric class!");    }    if (m_Classifier == null) {      throw new Exception("A base classifier has not been specified!");    }    m_NumClasses = data.numClasses();    m_Classifiers = Classifier.makeCopies(m_Classifier, getMaxIterations());    if ((!m_UseResampling) && 	(m_Classifier instanceof WeightedInstancesHandler)) {      buildClassifierWithWeights(data);    } else {      buildClassifierUsingResampling(data);    }    //=============== BEGIN EDIT melville ===============    m_EnsembleWts = new double [m_NumIterations];    if(m_NumIterations==1)	m_EnsembleWts[0] = 1.0;    else{	for(int i=0; i<m_NumIterations; i++)	    m_EnsembleWts[i] =  m_Betas[i];    }     computeEnsembleMeasures(data);    //=============== END EDIT melville ===============  }  /**   * Boosting method. Boosts using resampling   *   * @param data the training data to be used for generating the   * boosted classifier.   * @exception Exception if the classifier could not be built successfully   */  protected void buildClassifierUsingResampling(Instances data)     throws Exception {    Instances trainData, sample, training;    double epsilon, reweight, beta = 0, sumProbs;    double oldSumOfWeights, newSumOfWeights;    Evaluation evaluation;    int numInstances = data.numInstances();    Random randomInstance = new Random(m_Seed);    double[] probabilities;    int resamplingIterations = 0;    int k, l;    // Initialize data    m_Betas = new double [m_Classifiers.length];    m_NumIterations = 0;    // Create a copy of the data so that when the weights are diddled    // with it doesn't mess up the weights for anyone else    training = new Instances(data, 0, numInstances);    sumProbs = training.sumOfWeights();    for (int i = 0; i < training.numInstances(); i++) {      training.instance(i).setWeight(training.instance(i).				      weight() / sumProbs);    }        // Do boostrap iterations    for (m_NumIterations = 0; m_NumIterations < m_Classifiers.length; 	 m_NumIterations++) {      if (m_Debug) {	System.err.println("Training classifier " + (m_NumIterations + 1));      }      // Select instances to train the classifier on      if (m_WeightThreshold < 100) {	trainData = selectWeightQuantile(training, 					 (double)m_WeightThreshold / 100);      } else {	trainData = new Instances(training);      }            // Resample      resamplingIterations = 0;      double[] weights = new double[trainData.numInstances()];      for (int i = 0; i < weights.length; i++) {	weights[i] = trainData.instance(i).weight();      }      do {	sample = trainData.resampleWithWeights(randomInstance, weights);	// Build and evaluate classifier	m_Classifiers[m_NumIterations].buildClassifier(sample);	evaluation = new Evaluation(data);	evaluation.evaluateModel(m_Classifiers[m_NumIterations], 				 training);	epsilon = evaluation.errorRate();	resamplingIterations++;      } while (Utils.eq(epsilon, 0) && 	      (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS));      	      // Stop if error too big or 0      if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {	if (m_NumIterations == 0) {	  m_NumIterations = 1; // If we're the first we have to to use it	}	break;      }            // Determine the weight to assign to this model      m_Betas[m_NumIterations] = beta = Math.log((1 - epsilon) / epsilon);      reweight = (1 - epsilon) / epsilon;      if (m_Debug) {	System.err.println("\terror rate = " + epsilon			   +"  beta = " + m_Betas[m_NumIterations]);      }       // Update instance weights      oldSumOfWeights = training.sumOfWeights();      Enumeration enum = training.enumerateInstances();      while (enum.hasMoreElements()) {	Instance instance = (Instance) enum.nextElement();	if (!Utils.eq(m_Classifiers[m_NumIterations].classifyInstance(instance), 		     instance.classValue()))	  instance.setWeight(instance.weight() * reweight);      }      // Renormalize weights      newSumOfWeights = training.sumOfWeights();      enum = training.enumerateInstances();      while (enum.hasMoreElements()) {	Instance instance = (Instance) enum.nextElement();	instance.setWeight(instance.weight() * oldSumOfWeights 			   / newSumOfWeights);      }    }  }  /**   * Boosting method. Boosts any classifier that can handle weighted   * instances.   *   * @param data the training data to be used for generating the   * boosted classifier.   * @exception Exception if the classifier could not be built successfully   */  protected void buildClassifierWithWeights(Instances data)     throws Exception {    Instances trainData, training;    double epsilon, reweight, beta = 0;    double oldSumOfWeights, newSumOfWeights;    Evaluation evaluation;    int numInstances = data.numInstances();    // Initialize data    m_Betas = new double [m_Classifiers.length];    m_NumIterations = 0;    // Create a copy of the data so that when the weights are diddled    // with it doesn't mess up the weights for anyone else    training = new Instances(data, 0, numInstances);        // Do boostrap iterations    for (m_NumIterations = 0; m_NumIterations < m_Classifiers.length; 	 m_NumIterations++) {      if (m_Debug) {	System.err.println("Training classifier " + (m_NumIterations + 1));      }      // Select instances to train the classifier on      if (m_WeightThreshold < 100) {	trainData = selectWeightQuantile(training, 					 (double)m_WeightThreshold / 100);      } else {	trainData = new Instances(training, 0, numInstances);      }      // Build the classifier      m_Classifiers[m_NumIterations].buildClassifier(trainData);      // Evaluate the classifier      evaluation = new Evaluation(data);      evaluation.evaluateModel(m_Classifiers[m_NumIterations], training);      epsilon = evaluation.errorRate();      // Stop if error too small or error too big and ignore this model      if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {	  if (m_NumIterations == 0) {	  m_NumIterations = 1; // If we're the first we have to to use it	}	break;      }      // Determine the weight to assign to this model      m_Betas[m_NumIterations] = beta = Math.log((1 - epsilon) / epsilon);      reweight = (1 - epsilon) / epsilon;      if (m_Debug) {	System.err.println("\terror rate = " + epsilon			   +"  beta = " + m_Betas[m_NumIterations]);      }       // Update instance weights      oldSumOfWeights = training.sumOfWeights();      Enumeration enum = training.enumerateInstances();      while (enum.hasMoreElements()) {	Instance instance = (Instance) enum.nextElement();	if (!Utils.eq(m_Classifiers[m_NumIterations]		      .classifyInstance(instance), 		      instance.classValue()))	  instance.setWeight(instance.weight() * reweight);      }      // Renormalize weights      newSumOfWeights = training.sumOfWeights();      enum = training.enumerateInstances();      while (enum.hasMoreElements()) {	Instance instance = (Instance) enum.nextElement();	instance.setWeight(instance.weight() * oldSumOfWeights			   / newSumOfWeights);      }    }  }    /**   * Calculates the class membership probabilities for the given test instance.   *   * @param instance the instance to be classified   * @return predicted class probability distribution   * @exception Exception if instance could not be classified   * successfully   */  public double [] distributionForInstance(Instance instance)     throws Exception {      if (m_NumIterations == 0) {      throw new Exception("No model built");    }    double [] sums = new double [instance.numClasses()];         if (m_NumIterations == 1) {      if (m_Classifiers[0] instanceof DistributionClassifier) {	return ((DistributionClassifier)m_Classifiers[0]).	distributionForInstance(instance);      } else {	sums[(int)m_Classifiers[0].classifyInstance(instance)] ++;      }    } else {      for (int i = 0; i < m_NumIterations; i++) {	sums[(int)m_Classifiers[i].classifyInstance(instance)] += 	m_Betas[i];      }    }    Utils.normalize(sums);    return sums;  }    /**      * Given a set of unlabeled examples, select a specified number of examples to be labeled.     * @param unlabeledActivePool pool of unlabeled examples     * @param num number of examples to selcted for labeling     * @exception Exception if selective sampling fails     */    public int [] selectInstances(Instances unlabeledActivePool,int num) throws Exception{	//Make a list of pairs of indices and the corresponding measure of informativenes of examples	//Sort this in the order of informativeness and return the list of num indices	int poolSize = unlabeledActivePool.numInstances();	Pair []pairs = new Pair[poolSize];	for(int i=0; i<poolSize; i++){	    pairs[i] = new Pair(i,calculateMargin(unlabeledActivePool.instance(i)));	}	//sort in ascending order	Arrays.sort(pairs, new Comparator() {                public int compare(Object o1, Object o2) {		    double diff = ((Pair)o2).second - ((Pair)o1).second; 		    return(diff < 0 ? 1 : diff > 0 ? -1 : 0);		}            });	int []selected = new int[num];	if(m_Debug) System.out.println("Sorted list:");	for(int j=0; j<num; j++){	    if(m_Debug) System.out.println("\t"+pairs[j].second+"\t"+pairs[j].first);	    selected[j] = (int) pairs[j].first;	}	return selected;    }        //=============== BEGIN EDIT melville ===============    /** Returns class predictions of each ensemble member */    public double []getEnsemblePredictions(Instance instance) throws Exception{	double preds[] = new double [m_NumIterations];	for(int i=0; i<m_NumIterations; i++)	    preds[i] = m_Classifiers[i].classifyInstance(instance);		return preds;    }        /**      * Returns vote weights of ensemble members.     *     * @return vote weights of ensemble members     */    public double []getEnsembleWts(){	return m_EnsembleWts;    }        /** Returns size of ensemble */    public double getEnsembleSize(){	return m_NumIterations;    }    //=============== END EDIT melville ===============       /**   * Returns the boosted model as Java source code.   *   * @return the tree as Java source code   * @exception Exception if something goes wrong   */  public String toSource(String className) throws Exception {    if (m_NumIterations == 0) {      throw new Exception("No model built yet");    }    if (!(m_Classifiers[0] instanceof Sourcable)) {      throw new Exception("Base learner " + m_Classifier.getClass().getName()			  + " is not Sourcable");    }    StringBuffer text = new StringBuffer("class ");    text.append(className).append(" {\n\n");    text.append("  public static double classify(Object [] i) {\n");    if (m_NumIterations == 1) {      text.append("    return " + className + "_0.classify(i);\n");    } else {      text.append("    double [] sums = new double [" + m_NumClasses + "];\n");      for (int i = 0; i < m_NumIterations; i++) {	text.append("    sums[(int) " + className + '_' + i 		    + ".classify(i)] += " + m_Betas[i] + ";\n");      }      text.append("    double maxV = sums[0];\n" +		  "    int maxI = 0;\n"+		  "    for (int j = 1; j < " + m_NumClasses + "; j++) {\n"+		  "      if (sums[j] > maxV) { maxV = sums[j]; maxI = j; }\n"+		  "    }\n    return (double) maxI;\n");    }    text.append("  }\n}\n");    for (int i = 0; i < m_Classifiers.length; i++) {	text.append(((Sourcable)m_Classifiers[i])		    .toSource(className + '_' + i));    }    return text.toString();  }  /**   * Returns description of the boosted classifier.   *   * @return description of the boosted classifier as a string   */  public String toString() {        StringBuffer text = new StringBuffer();        if (m_NumIterations == 0) {      text.append("QBoost: No model built yet.\n");    } else if (m_NumIterations == 1) {      text.append("QBoost: No boosting possible, one classifier used!\n");      text.append(m_Classifiers[0].toString() + "\n");    } else {      text.append("QBoost: Base classifiers and their weights: \n\n");      for (int i = 0; i < m_NumIterations ; i++) {	text.append(m_Classifiers[i].toString() + "\n\n");	text.append("Weight: " + Utils.roundDouble(m_Betas[i], 2) + "\n\n");      }      text.append("Number of performed Iterations: " 		  + m_NumIterations + "\n");    }        return text.toString();  }          /**   * Main method for testing this class.   *   * @param argv the options   */  public static void main(String [] argv) {    try {      System.out.println(Evaluation.evaluateModel(new QBoost(), argv));    } catch (Exception e) {      System.err.println(e.getMessage());    }  }}  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -