📄 blue.java
字号:
/** * Given a set of incomplete instances, select a specified number of instance-feature queries. * @param train set of incomplete instances * @param num number of instance-feature pairs to selcted for acquiring remaining features * @param queryMatrix matrix to track available queries * @exception Exception if selection fails */ public Pair []selectInstancesForFeatures(Instances train, int num, boolean [][]queryMatrix) throws Exception{ Pair []queries = null; switch(m_Policy){ case ROUND_ROBIN: System.out.println("<<Round Robin>>"); queries = roundRobin(train, num, queryMatrix); break; case EXPECTED_UTILITY: System.out.println("<<Expected Utility>>"); queries = expectedUtility(train, num, queryMatrix); break; case EXPECTED_UTILITY_ENTROPY: System.out.println("<<Expected Utility using Entropy>>"); queries = expectedUtility(train, num, queryMatrix); break; case DEFAULT_RR: System.out.println("<<EU + RR>>"); queries = expectedUtility(train, num, queryMatrix); break; case ERROR_SAMPLING: System.out.println("<<Error Sampling>>"); queries = errorSampling(train, num, queryMatrix); break; case UNCERTAINTY_SAMPLING: System.out.println("<<Uncertainty Sampling>>"); queries = errorSampling(train, num, queryMatrix); break; case ERROR_SAMPLING_RR: System.out.println("<<Error Sampling + Round Robin>>"); queries = errorSampling(train, num, queryMatrix); break; case HBL: System.out.println("<<HBL>>"); queries = hbl(train, num, queryMatrix); break; case HBL_RR: System.out.println("<<HBL + Round Robin>>"); queries = hbl(train, num, queryMatrix); break; case HBL_ENTROPY: System.out.println("<<HBL + Entropy>>"); queries = hbl(train, num, queryMatrix); break; case RANDOM: System.out.println("<<Random Sampling>>"); queries = randomSampling(train, num, queryMatrix); break; case CHEAPEST: System.out.println("<<Cheapest>>"); queries = cheapest(train, num, queryMatrix); break; default: System.err.println("BLUE: Unrecognized selection policy."); } return queries; } /** * Hierarchical Budgeted Learning */ protected Pair []hbl(Instances train, int num, boolean [][]queryMatrix)throws Exception{ int subsetSize;//size of the subset of queries selected by errorSampling if(m_Alpha < 1.0) subsetSize = num; else subsetSize = (int) (num * m_Alpha); ArrayList subList; if(subsetSize >= numQueriesAvailable(queryMatrix)) subList = generateAllQueries(queryMatrix);//include all queries else { Pair []subset=null; switch(m_HBLPolicy){ case HBL_ERROR_SAMPLING: subset = errorSampling(train, subsetSize, queryMatrix); break; case HBL_UNCERTAINTY_SAMPLING: subset = errorSampling(train, subsetSize, queryMatrix); break; case HBL_RANDOM: subset = randomSampling(train, subsetSize, queryMatrix); break; default: System.err.println("BLUE: Unrecognized HBL policy."); } subList = new ArrayList(); for(int i=0; i<subset.length; i++) subList.add(subset[i]); } boolean []featuresAvailable = findAvailableFeatures(subList, train.numAttributes()-1); return selectFromAvailable(train, num, subList, featuresAvailable); } //Determine which features (columns) have missing values protected boolean []findAvailableFeatures(ArrayList allQueries, int numFeatures){ boolean []featuresAvailable = new boolean[numFeatures]; Pair curr; for(int i=0; i<allQueries.size(); i++){ curr = (Pair) allQueries.get(i); featuresAvailable[(int)curr.second] = true; } return featuresAvailable; } //Count the number of queries available protected int numQueriesAvailable(boolean [][]queryMatrix){ int ctr = 0; for(int i=0; i<queryMatrix.length; i++) for(int j=0; j<(queryMatrix[0].length); j++) if(!queryMatrix[i][j]) ctr++; return ctr; } //Generate the list of all query pairs protected ArrayList generateAllQueries(boolean [][]queryMatrix){ ArrayList allQueries = new ArrayList(); for(int i=0; i<queryMatrix.length; i++) for(int j=0; j<(queryMatrix[0].length); j++) if(!queryMatrix[i][j]) allQueries.add(new Pair(i,j)); return allQueries; } //Select instances using error sampling, then select features for these instances protected Pair []errorSampling(Instances train, int num, boolean [][]queryMatrix)throws Exception{ //Create list of incomplete instances in the training set //Score each incomplete instance based on the error sampling score //Associate the same score for each query available for the instance //Sort queries based on the score /* Quite often instances will have the same score, in which * case we would like to treat all features from these * instances and equally valuable for selection. */ if(m_Policy==UNCERTAINTY_SAMPLING || (m_Policy==HBL && m_HBLPolicy==HBL_UNCERTAINTY_SAMPLING)) System.out.println("UNCERTAINTY SAMPLING..."); else System.out.println("ERROR SAMPLING..."); //Make a list of pairs of indices of instances in the query matrix and the corresponding score int numInstances = train.numInstances(); int numFeatures = train.numAttributes()-1; //create a list of query pairs ArrayList allQueries = new ArrayList(); ArrayList pairList = new ArrayList(); //list of query-score pairs double score; int numQueries = 0; for(int i=0; i<numInstances; i++){ int ctr=0; for(int j=0; j<numFeatures; j++) if(!queryMatrix[i][j]){ allQueries.add(new Pair(i,j)); ctr++;//counts features available for current instance } if(ctr>0){//the instance is incomplete //perform error sampling by default if(m_Policy==UNCERTAINTY_SAMPLING || (m_Policy==HBL && m_HBLPolicy==HBL_UNCERTAINTY_SAMPLING)) score = -1*calculateMargin(train.instance(i)); else score = -1*calculateRandomHybridScore(train.instance(i)); //associate score with all available feature queries for this instance //the scores are negated only for consistency of ordering Pair curr; for(int k=numQueries;k<numQueries+ctr;k++){ curr = new Pair(k, score); pairList.add(curr); } } numQueries += ctr; } assert (numQueries==allQueries.size()) : "Checksum error"; if(m_Policy != ERROR_SAMPLING_RR && m_Policy != HBL_RR ) Collections.shuffle(pairList, m_Random);//shuffle so that ties are broken randomly //else select all features from one incomplete instance before //proceeding to the next //sort in DEScending order Collections.sort(pairList, new Comparator() { public int compare(Object o1, Object o2) { double diff = ((Pair)o1).second - ((Pair)o2).second; return(diff < 0 ? 1 : diff > 0 ? -1 : 0); } }); Pair []queries = new Pair[num]; if(m_Debug) System.out.println("Sorted list:"); for(int j=0; j<num; j++){ if(m_Debug) System.out.println("\t"+((Pair) pairList.get(j)).second+"\t"+((Pair) pairList.get(j)).first); queries[j] = (Pair) allQueries.get((int) ((Pair) pairList.get(j)).first); } return queries; } //Select features using a round robin policy protected Pair []roundRobin(Instances train, int num, boolean [][]queryMatrix){ int numInstances = train.numInstances(); int numFeatures = train.numAttributes()-1; //create a list of query pairs Pair []queries = new Pair[num]; int c=0; for(int i=0; i<numInstances && c<num; i++) for(int j=0; j<numFeatures && c<num; j++) if(!queryMatrix[i][j]) queries[c++] = new Pair(i,j); return queries; } //Randomly select num queries protected Pair []randomSampling(Instances train, int num, boolean [][]queryMatrix) throws Exception{ int numInstances = train.numInstances(); int numFeatures = train.numAttributes()-1; //create a list of query pairs ArrayList allQueries = new ArrayList(); for(int i=0; i<numInstances; i++) for(int j=0; j<numFeatures; j++) if(!queryMatrix[i][j]) allQueries.add(new Pair(i,j)); Collections.shuffle(allQueries, m_Random); Pair []queries = new Pair[num]; for(int i=0; i<num; i++) queries[i] = (Pair) allQueries.get(i); return queries; } //Acquire features in order of increasing cost protected Pair []cheapest(Instances train, int num, boolean [][]queryMatrix) throws Exception{ int numInstances = train.numInstances(); int numFeatures = train.numAttributes()-1; //associate feature indices with costs Pair []indexCosts = new Pair[numFeatures]; for(int i=0;i<numFeatures;i++) indexCosts[i] = new Pair(i,m_FeatureCosts[i]); //sort in AScending order of costs Arrays.sort(indexCosts, new Comparator() { public int compare(Object o1, Object o2) { double diff = ((Pair)o2).second - ((Pair)o1).second; return(diff < 0 ? 1 : diff > 0 ? -1 : 0); } }); //create a list of query pairs Pair []queries = new Pair[num]; int c=0; for(int j=0; j<numFeatures && c<num; j++){ int featureIndex = (int) indexCosts[j].first; for(int i=0; i<numInstances && c<num; i++) if(!queryMatrix[i][featureIndex]) queries[c++] = new Pair(i,featureIndex); } return queries; } //Selected features based on the maximum expected utility of acquiring the feature-value protected Pair[]expectedUtility(Instances train, int num, boolean [][]queryMatrix) throws Exception{ int numInstances = train.numInstances(); int numFeatures = train.numAttributes()-1; //create a list of query pairs ArrayList allQueries = new ArrayList(); boolean []featureAvailable = new boolean[numFeatures]; for(int i=0; i<numInstances; i++) for(int j=0; j<numFeatures; j++) if(!queryMatrix[i][j]){ allQueries.add(new Pair(i,j)); featureAvailable[j] = true; //keep track which features (columns) are still available } //Shuffle all the queries unless the default is Round Robin if(m_Policy!=DEFAULT_RR && m_Policy!=HBL_RR) Collections.shuffle(allQueries, m_Random); return selectFromAvailable(train, num, allQueries, featureAvailable); } protected Pair[]selectFromAvailable(Instances train, int num, ArrayList allQueries, boolean []featureAvailable)throws Exception{ int numFeatures = train.numAttributes()-1; Pair []queries = new Pair[num]; //Generate a classifier for each available feature //For each instance-feature pair compute a score //Sort queries by score //Return top num queries /************************* * We are assuming all features are nominal. But this can be * changed by using a discretizer for numeric features and * then treating them as nominal. This can be done by passing * the training set through a filter. *************************/ double currentMeasure = computeCurrentMeasure(train);//accuracy/entropy on training set int origClassIndex=-1; Classifier []featurePredictors=null; if(m_UseNaiveBayes){ NaiveBayes nb = new NaiveBayes(); nb.buildClassifier(train); m_Distributions = nb.getDistributions(); }else{ origClassIndex = train.classIndex();//backup class index featurePredictors = new Classifier [numFeatures]; for(int i=0; i<numFeatures; i++){ if(featureAvailable[i]){ Classifier tmp[] = Classifier.makeCopies(m_Classifier,1); featurePredictors[i] = tmp[0]; train.setClassIndex(i);//set the feature (column) as the target variable featurePredictors[i].buildClassifier(train);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -