⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blue.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
        /**      * Given a set of incomplete instances, select a specified number of instance-feature queries.     * @param train set of incomplete instances     * @param num number of instance-feature pairs to selcted for acquiring remaining features     * @param queryMatrix matrix to track available queries     * @exception Exception if selection fails     */    public Pair []selectInstancesForFeatures(Instances train, int num, boolean [][]queryMatrix) throws Exception{	Pair []queries = null;	switch(m_Policy){	case ROUND_ROBIN:	    System.out.println("<<Round Robin>>");	    queries = roundRobin(train, num, queryMatrix);	    break;	case EXPECTED_UTILITY:	    System.out.println("<<Expected Utility>>");	    queries = expectedUtility(train, num, queryMatrix);	    break;	case EXPECTED_UTILITY_ENTROPY:	    System.out.println("<<Expected Utility using Entropy>>");	    queries = expectedUtility(train, num, queryMatrix);	    break;	case DEFAULT_RR:	    System.out.println("<<EU + RR>>");	    queries = expectedUtility(train, num, queryMatrix);	    break;    	case ERROR_SAMPLING:	    System.out.println("<<Error Sampling>>");	    queries = errorSampling(train, num, queryMatrix);	    break;	case UNCERTAINTY_SAMPLING:	    System.out.println("<<Uncertainty Sampling>>");	    queries = errorSampling(train, num, queryMatrix);	    break;    	case ERROR_SAMPLING_RR:	    System.out.println("<<Error Sampling + Round Robin>>");	    queries = errorSampling(train, num, queryMatrix);	    break;	case HBL:	    System.out.println("<<HBL>>");	    queries = hbl(train, num, queryMatrix);	    break;	case HBL_RR:	    System.out.println("<<HBL + Round Robin>>");	    queries = hbl(train, num, queryMatrix);	    break;	case HBL_ENTROPY:	    System.out.println("<<HBL + Entropy>>");	    queries = hbl(train, num, queryMatrix);	    break;	case RANDOM:	    System.out.println("<<Random Sampling>>");	    queries = randomSampling(train, num, queryMatrix);	    break; 	case CHEAPEST:	    System.out.println("<<Cheapest>>");	    queries = cheapest(train, num, queryMatrix);	    break;  	default:	    System.err.println("BLUE: Unrecognized selection policy.");	}	return queries;    }        /**     * Hierarchical Budgeted Learning     */    protected Pair []hbl(Instances train, int num, boolean [][]queryMatrix)throws Exception{	int subsetSize;//size of the subset of queries selected by errorSampling	if(m_Alpha < 1.0) subsetSize = num;	else subsetSize = (int) (num * m_Alpha);		ArrayList subList;	if(subsetSize >= numQueriesAvailable(queryMatrix))	    subList = generateAllQueries(queryMatrix);//include all queries	else {	    Pair []subset=null;	    switch(m_HBLPolicy){	    case HBL_ERROR_SAMPLING:		subset = errorSampling(train, subsetSize, queryMatrix); 		break;	    case HBL_UNCERTAINTY_SAMPLING:		subset = errorSampling(train, subsetSize, queryMatrix); 		break;	    case HBL_RANDOM:		subset = randomSampling(train, subsetSize, queryMatrix); 		break;	    default:		System.err.println("BLUE: Unrecognized HBL policy.");	    }	    	    subList = new ArrayList();	    for(int i=0; i<subset.length; i++)		subList.add(subset[i]);	}	boolean []featuresAvailable = findAvailableFeatures(subList, train.numAttributes()-1);	return selectFromAvailable(train, num, subList, featuresAvailable);    }            //Determine which features (columns) have missing values    protected boolean []findAvailableFeatures(ArrayList allQueries, int numFeatures){	boolean []featuresAvailable = new boolean[numFeatures];	Pair curr;	for(int i=0; i<allQueries.size(); i++){	    curr = (Pair) allQueries.get(i);	    featuresAvailable[(int)curr.second] = true;	}	return featuresAvailable;    }        //Count the number of queries available    protected int numQueriesAvailable(boolean [][]queryMatrix){	int ctr = 0;	for(int i=0; i<queryMatrix.length; i++)	    for(int j=0; j<(queryMatrix[0].length); j++)		if(!queryMatrix[i][j]) ctr++;	return ctr;    }        //Generate the list of all query pairs    protected ArrayList generateAllQueries(boolean [][]queryMatrix){	ArrayList allQueries = new ArrayList();	for(int i=0; i<queryMatrix.length; i++)	    for(int j=0; j<(queryMatrix[0].length); j++)		if(!queryMatrix[i][j]) allQueries.add(new Pair(i,j)); 	return allQueries;    }        //Select instances using error sampling, then select features for these instances    protected Pair []errorSampling(Instances train, int num, boolean [][]queryMatrix)throws Exception{	//Create list of incomplete instances in the training set	//Score each incomplete instance based on the error sampling score	//Associate the same score for each query available for the instance	//Sort queries based on the score		/* Quite often instances will have the same score, in which	 * case we would like to treat all features from these	 * instances and equally valuable for selection.  */		if(m_Policy==UNCERTAINTY_SAMPLING || 	   (m_Policy==HBL && m_HBLPolicy==HBL_UNCERTAINTY_SAMPLING))	    System.out.println("UNCERTAINTY SAMPLING...");	else 	    System.out.println("ERROR SAMPLING...");		//Make a list of pairs of indices of instances in the query matrix and the corresponding score	int numInstances = train.numInstances();	int numFeatures = train.numAttributes()-1;	//create a list of query pairs	ArrayList allQueries = new ArrayList();	ArrayList pairList = new ArrayList(); //list of query-score pairs	double score;	int numQueries = 0;	for(int i=0; i<numInstances; i++){	    int ctr=0;	    for(int j=0; j<numFeatures; j++)		if(!queryMatrix[i][j]){		    allQueries.add(new Pair(i,j)); 		    ctr++;//counts features available for current instance		}	    if(ctr>0){//the instance is incomplete		//perform error sampling by default		if(m_Policy==UNCERTAINTY_SAMPLING || 		   (m_Policy==HBL && m_HBLPolicy==HBL_UNCERTAINTY_SAMPLING))		    score = -1*calculateMargin(train.instance(i));		else		    score = -1*calculateRandomHybridScore(train.instance(i));				//associate score with all available feature queries for this instance		//the scores are negated only for consistency of ordering		Pair curr;		for(int k=numQueries;k<numQueries+ctr;k++){		    curr = new Pair(k, score);		    pairList.add(curr);		}	    }	    numQueries += ctr;	}		assert (numQueries==allQueries.size()) : "Checksum error";		if(m_Policy != ERROR_SAMPLING_RR && m_Policy != HBL_RR )	    Collections.shuffle(pairList, m_Random);//shuffle so that ties are broken randomly	//else select all features from one incomplete instance before	//proceeding to the next	//sort in DEScending order	Collections.sort(pairList, new Comparator() {                public int compare(Object o1, Object o2) {		    double diff = ((Pair)o1).second - ((Pair)o2).second; 		    return(diff < 0 ? 1 : diff > 0 ? -1 : 0);		}            });		Pair []queries = new Pair[num];	if(m_Debug) System.out.println("Sorted list:");	for(int j=0; j<num; j++){	    if(m_Debug) System.out.println("\t"+((Pair) pairList.get(j)).second+"\t"+((Pair) pairList.get(j)).first);	    queries[j] = (Pair) allQueries.get((int) ((Pair) pairList.get(j)).first);	}	return queries;    }                //Select features using a round robin policy    protected Pair []roundRobin(Instances train, int num, boolean [][]queryMatrix){	int numInstances = train.numInstances();	int numFeatures = train.numAttributes()-1;	//create a list of query pairs	Pair []queries = new Pair[num];	int c=0;	for(int i=0; i<numInstances && c<num; i++)	    for(int j=0; j<numFeatures && c<num; j++)		if(!queryMatrix[i][j])		    queries[c++] = new Pair(i,j); 	return queries;    }        //Randomly select num queries    protected Pair []randomSampling(Instances train, int num, boolean [][]queryMatrix) throws Exception{	int numInstances = train.numInstances();	int numFeatures = train.numAttributes()-1;	//create a list of query pairs	ArrayList allQueries = new ArrayList();	for(int i=0; i<numInstances; i++)	    for(int j=0; j<numFeatures; j++)		if(!queryMatrix[i][j]) allQueries.add(new Pair(i,j)); 		Collections.shuffle(allQueries, m_Random);	Pair []queries = new Pair[num];	for(int i=0; i<num; i++)	    queries[i] = (Pair) allQueries.get(i);	return queries;    }    //Acquire features in order of increasing cost    protected Pair []cheapest(Instances train, int num, boolean [][]queryMatrix) throws Exception{	int numInstances = train.numInstances();	int numFeatures = train.numAttributes()-1;		//associate feature indices with costs	Pair []indexCosts = new Pair[numFeatures];	for(int i=0;i<numFeatures;i++)	    indexCosts[i] = new Pair(i,m_FeatureCosts[i]);		//sort in AScending order of costs	Arrays.sort(indexCosts, new Comparator() {                public int compare(Object o1, Object o2) {		    double diff = ((Pair)o2).second - ((Pair)o1).second; 		    return(diff < 0 ? 1 : diff > 0 ? -1 : 0);		}            });		//create a list of query pairs	Pair []queries = new Pair[num];	int c=0;	for(int j=0; j<numFeatures && c<num; j++){	    int featureIndex = (int) indexCosts[j].first;	    for(int i=0; i<numInstances && c<num; i++)		if(!queryMatrix[i][featureIndex])		    queries[c++] = new Pair(i,featureIndex); 	}		return queries;    }     //Selected features based on the maximum expected utility of acquiring the feature-value    protected Pair[]expectedUtility(Instances train, int num, boolean [][]queryMatrix) throws Exception{	int numInstances = train.numInstances();	int numFeatures = train.numAttributes()-1;	//create a list of query pairs	ArrayList allQueries = new ArrayList();	boolean []featureAvailable = new boolean[numFeatures];	for(int i=0; i<numInstances; i++)	    for(int j=0; j<numFeatures; j++)		if(!queryMatrix[i][j]){		    allQueries.add(new Pair(i,j)); 		    featureAvailable[j] = true;		    //keep track which features (columns) are still available		}		//Shuffle all the queries unless the default is Round Robin	if(m_Policy!=DEFAULT_RR && m_Policy!=HBL_RR) Collections.shuffle(allQueries, m_Random);	return selectFromAvailable(train, num, allQueries, featureAvailable);    }            protected Pair[]selectFromAvailable(Instances train, int num, ArrayList allQueries, boolean []featureAvailable)throws Exception{	int numFeatures = train.numAttributes()-1;	Pair []queries = new Pair[num];	//Generate a classifier for each available feature	//For each instance-feature pair compute a score	//Sort queries by score	//Return top num queries		/*************************	 * We are assuming all features are nominal. But this can be	 * changed by using a discretizer for numeric features and	 * then treating them as nominal. This can be done by passing	 * the training set through a filter.	 *************************/		double currentMeasure = computeCurrentMeasure(train);//accuracy/entropy on training set		int origClassIndex=-1;	Classifier []featurePredictors=null;	    	if(m_UseNaiveBayes){	    NaiveBayes nb = new NaiveBayes();	    nb.buildClassifier(train);	    m_Distributions = nb.getDistributions();	}else{	    origClassIndex = train.classIndex();//backup class index	    featurePredictors = new Classifier [numFeatures];	    for(int i=0; i<numFeatures; i++){		if(featureAvailable[i]){		    Classifier tmp[] = Classifier.makeCopies(m_Classifier,1);		    featurePredictors[i] = tmp[0]; 		    train.setClassIndex(i);//set the feature (column) as the target variable		    featurePredictors[i].buildClassifier(train);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -