blue.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 974 行 · 第 1/3 页
JAVA
974 行
		}	    }	    train.setClassIndex(origClassIndex);//reset class index	}		double []probs=null;	Pair []pairs = new Pair[allQueries.size()];	for(int i=0; i<allQueries.size(); i++){	    Pair curr = (Pair) allQueries.get(i);	    Instance instance = train.instance((int)curr.first);	    int featureIndex = (int)curr.second;	    	    if(!m_UseNaiveBayes){		train.setClassIndex(featureIndex);		probs = ((DistributionClassifier)featurePredictors[featureIndex]).distributionForInstance(instance);		train.setClassIndex(origClassIndex);//reset class index	    }	    //Try this out with 	    //1) uniform priors, 	    //2) probabilities estimated from the training data, 	    //3) and Laplace smoothing	    double score = computeUtility(instance, featureIndex, probs, train, currentMeasure); 	    pairs[i] = new Pair(i,score);	    //Associate the score with the query	}		//sort in DEScending order	Arrays.sort(pairs, new Comparator() {                public int compare(Object o1, Object o2) {		    double diff = ((Pair)o1).second - ((Pair)o2).second; 		    return(diff < 0 ? 1 : diff > 0 ? -1 : 0);		}            });		if(m_UseWeightedSampling){//use probabilistic selection of queries	    pairs = sampleWithWeights(pairs, num);	}//else select top n queries		if(m_Debug) System.out.println("Selected list:");	for(int j=0; j<num; j++){	    if(m_Debug) System.out.println("\t"+pairs[j].second+"\t"+pairs[j].first);	    queries[j] = (Pair) allQueries.get((int) pairs[j].first);	}		return queries;    }        /**     * Sample from a distribution based on assigned scores.     *      * @param pairs array of pairs for object and score     * @param num number of objects to select     **/    protected Pair []sampleWithWeights(Pair []pairs, int num){	//convert array of pairs to vector	int poolSize = pairs.length;	Vector v = new Vector(poolSize);	for(int i=0; i<poolSize; i++)	    v.add(pairs[i]);		return sampleWithWeights(v, num);    }        /**     * Sample from a distribution based on assigned scores.     *      * @param v vector of pairs for object and score     * @param num number of objects to select     **/    protected Pair []sampleWithWeights(Vector v, int num){	int poolSize = v.size();	//Assumes list is in descending order of scores	//move range to account for any negative values	double min = ((Pair) v.get(poolSize - 1)).second;	Pair curr;	if(min < 0){	    for(int i=0; i<poolSize; i++){		curr = (Pair) v.get(i);		curr.second = curr.second - min;	    }	}		Pair []selected = new Pair[num];	double sum;	/* For j=1 to n	 *     Create a cdf	 *     Randomly pick instance based on cdf	 *     Note index and remove element 	 */	for(int j=0; j<num; j++){	    sum = 0;	    for(int i=0; i<v.size(); i++)		sum += ((Pair) v.get(i)).second;	    	    //normalize//  	    if (Double.isNaN(sum)) {//  		for(int i=0; i<v.size(); i++)//  		    System.err.print(((Pair) v.get(i)).second+" ");//  		System.err.println();//  		throw new IllegalArgumentException("Can't normalize array. Sum is NaN. Sum = "+sum);//  	    }	    //	    if (sum == 0) {	    if (sum == 0 || Double.isNaN(sum)) {		System.err.println("Sum = "+sum+", setting to uniform weights.");		//set probabilities for uniform selection		double uniform = 1.0/v.size();		for(int i=0; i<v.size(); i++)		    ((Pair) v.get(i)).second = uniform;		sum = 1.0;	    }else{		for(int i=0; i<v.size(); i++)		    ((Pair) v.get(i)).second = ((Pair) v.get(i)).second/sum;	    }	    	    //create a cdf	    double []cdf = new double[v.size()];	    cdf[0] = ((Pair) v.get(0)).second;	    for(int i=1; i<v.size(); i++)		cdf[i] = ((Pair) v.get(i)).second + cdf[i-1];	    	    double rnd = m_Random.nextDouble();	    int index = 0;	    while(index < cdf.length && rnd > cdf[index]){		index++;	    }	    selected[j] = (Pair) v.get(index);	    v.remove(index);	}	assert v.size()+num==poolSize : v.size()+" + "+num+" != "+poolSize+"\n";		return selected;    }        //      //randomly shuffle the given list//      protected void shuffle(ArrayList list){//  	System.out.println("Doing the shuffle...");//  	Random random = new Random(m_Seed);//  	Object obj; int loc;//  	for (int j = list.size()-1; j > 0; j--){//  	    //swap objects//  	    obj = list.get(j);//  	    loc = random.nextInt(j+1);//  	    list.set(j,list.get(loc));//  	    list.set(loc,obj);//  	}//      }            /**     * Compute the utility of the instance-feature pair.      * Expected accuracy, Acc_{t+1} = Sigma_i (P(Fj) * Acc(M(Fj))     * Score = (Acc_{t+1} - Acc_{t})/Cost_of_Fj     * Score can be computed for measures other than accuracy, e.g. entropy.     *     * @param instance instance under consideration     * @param featureIndex feature under consideration     * @param probs predicted probability of each feature-value for the instance     * @param train training set over which utility is measured     * @param currentMeasure the accuracy/entropy of the current model     */    protected double computeUtility(Instance instance, int featureIndex, double []probs, Instances train, double currentMeasure) throws Exception{	//For each feature-value with a non-zero probability generate a classifier	//Measure accuracy of the classifier	//Compute score as the expected accuracy of the classifier	double sum = 0.0;	int numValues = train.attribute(featureIndex).numValues();		Classifier classifier;	Evaluation eval;	double utility; 	//Assumes that probs is actually a distribution i.e. adds up to 1.0	for(int i=0; i<numValues; i++){	    if(probs==null || probs[i]!=0){		Classifier tmp[] = Classifier.makeCopies(m_Classifier,1);		classifier = tmp[0];		instance.setValue(featureIndex, i);		classifier.buildClassifier(train);//train classifier assuming current value for feature		instance.setMissing(featureIndex);//reset feature to be missing		//DEBUG should handle Evaluation(train, costMatrix)		if(m_Policy==EXPECTED_UTILITY_ENTROPY || m_Policy==HBL_ENTROPY){		    //if(m_Debug) System.out.println("Using entropy...");		    //compute the expected entropy		    eval = new Evaluation(train);		    eval.evaluateModel(classifier, train);		    utility = -1 * eval.SFMeanSchemeEntropy();		}else{		    //compute expected accuracy		    utility = computeAccuracy(classifier, train);		    //accuracy = eval.pctCorrect();		}				if(m_UseNaiveBayes) {		    sum += m_Distributions[featureIndex][(int)instance.classValue()].getProbability(i)*			utility;		}else{		    sum += probs[i]*utility;		}	    }	}	return ((sum - currentMeasure)/m_FeatureCosts[featureIndex]);    }	        //Compute current model's accuracy/entropy on training set    protected double computeCurrentMeasure(Instances train) throws Exception{	Evaluation eval;	double measure = 0.0;	if(m_Policy==EXPECTED_UTILITY_ENTROPY || m_Policy==HBL_ENTROPY){	    //if(m_Debug) System.out.println("Using entropy...");	    //compute the current (negative) entropy	    eval = new Evaluation(train);	    eval.evaluateModel(m_Classifier, train);	    measure = -1 * eval.SFMeanSchemeEntropy();	}else{	    //compute expected accuracy	    measure = computeAccuracy(m_Classifier, train);	}	return measure;    }    	    /**      * Computes the accuracy in classification on the given data.     *     * @param data the instances to be classified     * @return classification accuracy     * @exception Exception if error can not be computed successfully     */    protected double computeAccuracy(Classifier classifier, Instances data) throws Exception {	double acc = 0.0;	int numInstances = data.numInstances();	Instance curr;		for(int i=0; i<numInstances; i++){	    curr = data.instance(i);	    //Check if the instance has been correctly classified	    if(curr.classValue() == ((int) classifier.classifyInstance(curr))) acc++;	}	return (acc/numInstances);    }        //For debugging purposes    void printArray(double []array){	for(int i=0; i<array.length; i++)	    System.out.print(array[i]+" ");	System.out.println();    }      /**   * Build a classifier based on the selected base learner.    *   * @param data the training data to be used for generating the   * Blue classifier.   * @exception Exception if the classifier could not be built successfully   */  public void buildClassifier(Instances data) throws Exception {      m_Classifier.buildClassifier(data);  }      /**   * Calculates the class membership probabilities for the given test instance.   *   * @param instance the instance to be classified   * @return preedicted class probability distribution   * @exception Exception if distribution can't be computed successfully   */  public double[] distributionForInstance(Instance instance) throws Exception {      return m_Classifier.distributionForInstance(instance);  }      /**   * Returns description of the bagged classifier.   *   * @return description of the bagged classifier as a string   */  public String toString() {      return m_Classifier.toString();  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(1);    newVector.addElement(new Option(	      "\tFull name of classifier to bag.\n"	      + "\teg: weka.classifiers.trees.j48.J48",	      "W", 1, "-W"));    if ((m_Classifier != null) &&	(m_Classifier instanceof OptionHandler)) {      newVector.addElement(new Option(	     "",	     "", 0, "\nOptions specific to classifier "	     + m_Classifier.getClass().getName() + ":"));      Enumeration enum = ((OptionHandler)m_Classifier).listOptions();      while (enum.hasMoreElements()) {	newVector.addElement(enum.nextElement());      }    }    return newVector.elements();  }      /**   * Main method for testing this class.   *   * @param argv the options   */  public static void main(String [] argv) {       try {      System.out.println(Evaluation.			 evaluateModel(new Blue(), argv));    } catch (Exception e) {      System.err.println(e.getMessage());    }  }}
blue.java - 源码说明

本页面展示了「wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器」中的 blue.java 源码文件，采用 Java 编程语言编写，共 974 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与university相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?