⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 activedecorate.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
	//Compute cumulative probabilities 	cdf[0] = invProbs[0];	for(int i=1; i<invProbs.length; i++){	    cdf[i] = invProbs[i]+cdf[i-1];	}		if(Double.isNaN(cdf[invProbs.length-1]))	    System.err.println("Cumulative class membership probability is NaN!"); 	return selectIndexProbabilistically(cdf);    }        /**      * Given cumulative probabilities select a nominal attribute value index      *     * @param cdf array of cumulative probabilities     * @return index of attribute selected based on the probability distribution      */    protected int selectIndexProbabilistically(double []cdf){	double rnd = m_Random.nextDouble();	int index = 0;	while(index < cdf.length && rnd > cdf[index]){	    index++;	}	return index;    }         /**     * Removes a specified number of instances from the given set of instances.     *     * @param data given instances     * @param numRemove number of instances to delete from the given instances     */    protected void removeInstances(Instances data, int numRemove){	int num = data.numInstances();	for(int i=num - 1; i>num - 1 - numRemove;i--){	    data.delete(i);	}    }        /**     * Add new instances to the given set of instances.     *     * @param data given instances     * @param newData set of instances to add to given instances     */    protected void addInstances(Instances data, Instances newData){	for(int i=0; i<newData.numInstances(); i++)	    data.add(newData.instance(i));    }        /**      * Computes the error in classification on the given data.     *     * @param data the instances to be classified     * @return classification error     * @exception Exception if error can not be computed successfully     */    protected double computeError(Instances data) throws Exception {	double error = 0.0;	int numInstances = data.numInstances();	Instance curr;		for(int i=0; i<numInstances; i++){	    curr = data.instance(i);	    //Check if the instance has been misclassified	    if(curr.classValue() != ((int) classifyInstance(curr))) error++;	}	return (error/numInstances);    }      /**   * Calculates the class membership probabilities for the given test instance.   *   * @param instance the instance to be classified   * @return predicted class probability distribution   * @exception Exception if distribution can't be computed successfully   */  public double[] distributionForInstance(Instance instance) throws Exception {      if (instance.classAttribute().isNumeric()) {	  throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");      }      double [] sums = new double [instance.numClasses()], newProbs;       Classifier curr;            for (int i = 0; i < m_Committee.size(); i++) {	  curr = (Classifier) m_Committee.get(i);	  if (curr instanceof DistributionClassifier) {	      newProbs = ((DistributionClassifier)curr).distributionForInstance(instance);	      for (int j = 0; j < newProbs.length; j++)		  sums[j] += newProbs[j];	  } else {	      sums[(int)curr.classifyInstance(instance)]++;	  }      }      if (Utils.eq(Utils.sum(sums), 0)) {	  return sums;      } else {	  Utils.normalize(sums);	  return sums;      }  }              /**      * Given a set of unlabeled examples, select a specified number of examples to be labeled.     * @param unlabeledActivePool pool of unlabeled examples     * @param num number of examples to selcted for labeling     * @exception Exception if selective sampling fails     */    public int [] selectInstances(Instances unlabeledActivePool,int num) throws Exception{	//Make a list of pairs of indices and the corresponding measure of informativenes of examples	//Sort this in the order of informativeness and return the list of num indices	int poolSize = unlabeledActivePool.numInstances();	Pair []pairs = new Pair[poolSize];	for(int i=0; i<poolSize; i++){	    pairs[i] = new Pair(i,calculateDisagreement(unlabeledActivePool.instance(i)));	}	//sort in descending order	Arrays.sort(pairs, new Comparator() {                public int compare(Object o1, Object o2) {		    double diff = ((Pair)o1).second - ((Pair)o2).second; 		    return(diff < 0 ? 1 : diff > 0 ? -1 : 0);		}            });	int []selected = new int[num];	if(m_Debug) System.out.println("Sorted list:");	for(int j=0; j<num; j++){	    if(m_Debug) System.out.println("\t"+pairs[j].second+"\t"+pairs[j].first);	    selected[j] = (int) pairs[j].first;	}	return selected;    }        /**     * Calculate the disagreement in the ensemble over the label of     * given examples depending on the chosen selection scheme.     * @param instance unlabeled instance from the current pool     * @return nomalized measure of disagreement     * @exception Exception if disagreement could not be calculated properly */    protected double calculateDisagreement(Instance instance) throws Exception{	double disagreement;	switch(m_SelectionScheme){	case JENSEN_SHANNON:	    disagreement = calcJSDivergence(instance);	    break;	case MAJORITY:	    disagreement = calcMajorityDis(instance);	    break;	case EUCLIDEAN:	    disagreement = calcEuclideanDis(instance);	    break;	case MARGIN:	    //negate margins so that the sort ordering does not need to be changed	    disagreement = -1.0 * calculateMargin(instance);	    break;	case BAGGING:	    disagreement = -1.0 * m_SelectionCommittee.calculateMargin(instance);	    break;	case BOOSTING:	    disagreement = -1.0 * m_SelectionCommittee.calculateMargin(instance);	    break;	default:	    disagreement = calcMajorityDis(instance);	}	return disagreement;    }            /**     * Calculate the disagreement in the ensemble over the label of     * given examples.  The disagreement is calculated between the     * posterior probabilities of each member classifier and those of     * the ensemble.     * @param instance unlabeled instance from the current pool     * @return nomalized measure of disagreement     * @exception Exception if disagreement could not be calculated properly     */    protected double calcJSDivergence(Instance instance) throws Exception{	if (!(m_Classifier instanceof DistributionClassifier)) 	    System.err.println("JS Divergence can only be applied to DistributionClassifiers.");	//if(m_Debug) System.out.println("Using JS Divergence.");	int size = m_Committee.size();	double [][]probs = new double [size][];	double [] avg = new double [instance.numClasses()];	Classifier curr;	for (int i = 0; i < m_Committee.size(); i++) {	    curr = (Classifier) m_Committee.get(i);	    probs[i] = ((DistributionClassifier)curr).distributionForInstance(instance);	    smoothDistribution(probs[i]);	    for (int j = 0; j < avg.length; j++)		avg[j] += probs[i][j];	}	Utils.normalize(avg);		double disagreement = 0.0;	for(int i=0; i<size; i++){	    disagreement += calcKLdivergence(probs[i], avg);	}	disagreement = disagreement/m_Committee.size();	return disagreement;    }              //Smooth given probability distribution to get rid of zero values    protected void smoothDistribution(double []probs){	for(int i=0; i<probs.length; i++)	    if(probs[i]==0) probs[i] = m_Epsilon;	Utils.normalize(probs);    }        /**     * Calculate the KL divergence between two probability distributions.     * @param p1 first probability disttribution     * @param p1 second probability disttribution     * @return the KL divergence between p1 and p2 */    protected double calcKLdivergence(double []p1, double []p2){	double kl = 0.0;	for(int i=0; i<p1.length; i++){	    kl += p1[i]*Math.log(p2[i]/p1[i]);	}	kl = -1.0 * kl;	return kl;    }        /**     * Calculate the disagreement in the ensemble over the label of     * given examples.  The disagreement is calculated using the     * Jensen-Shannon divergence of the posterior probabilities     * @param instance unlabeled instance from the current pool     * @return nomalized measure of disagreement      * @exception Exception if disagreement could not be calculated properly     */    protected double calcEuclideanDis(Instance instance) throws Exception{	if (!(m_Classifier instanceof DistributionClassifier)) 	    System.err.println("Euclidean disagreement can only be applied to DistributionClassifiers.");	//if(m_Debug) System.out.println("Using Euclidean disagreement.");	double disagreement = 0.0;	double []pred = distributionForInstance(instance);//ensemble decision	Classifier curr;	for (int i = 0; i < m_Committee.size(); i++) {	    curr = (Classifier) m_Committee.get(i);	    double sum = 0.0; 	    double []newProbs = ((DistributionClassifier)curr).distributionForInstance(instance);	    for (int j = 0; j < newProbs.length; j++)		sum += Math.pow((newProbs[j]-pred[j]),2);	    disagreement += Math.sqrt(sum);	}	disagreement = disagreement/m_Committee.size();	//This normalization step is not necessary for selection, but	//is useful for comparing disagreement values for different	//points on the learning curves (as committee size can change)	return disagreement;    }        /**     * Calculate the disagreement in the ensemble over the label of given examples.     * @param instance unlabeled instance from the current pool     * @return nomalized measure of disagreement     * @exception Exception if disagreement could not be calculated properly     */    protected double calcMajorityDis(Instance instance) throws Exception{	//if(m_Debug) System.out.println("Using majority vote disagreement.");	double disagreement = 0.0;	double pred = classifyInstance(instance);//ensemble decision	Classifier curr;	for (int i = 0; i < m_Committee.size(); i++) {	  curr = (Classifier) m_Committee.get(i);	  if(curr.classifyInstance(instance) != pred) disagreement++;	}	disagreement = disagreement/m_Committee.size();	//This normalization step is not necessary for selection, but	//is useful for comparing disagreement values for different	//points on the learning curves (as committee size can change)	return disagreement;    }         /**     * Returns description of the Decorate classifier.     *     * @return description of the Decorate classifier as a string     */    public String toString() {	if (m_Committee == null) {	    return "Decorate: No model built yet.";	}	StringBuffer text = new StringBuffer();	text.append("Decorate base classifiers: \n\n");	for (int i = 0; i < m_Committee.size(); i++)	    text.append(((Classifier) m_Committee.get(i)).toString() + "\n\n");	text.append("Number of classifier in the ensemble: "+m_Committee.size()+"\n");	return text.toString();    }            /**     * Main method for testing this class.     *     * @param argv the options     */    public static void main(String [] argv) {	try {	    System.out.println(Evaluation.evaluateModel(new ActiveDecorate(), argv));	} catch (Exception e) {	    System.err.println(e.getMessage());	}    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -