📄 blue.java
字号:
} } train.setClassIndex(origClassIndex);//reset class index } double []probs=null; Pair []pairs = new Pair[allQueries.size()]; for(int i=0; i<allQueries.size(); i++){ Pair curr = (Pair) allQueries.get(i); Instance instance = train.instance((int)curr.first); int featureIndex = (int)curr.second; if(!m_UseNaiveBayes){ train.setClassIndex(featureIndex); probs = ((DistributionClassifier)featurePredictors[featureIndex]).distributionForInstance(instance); train.setClassIndex(origClassIndex);//reset class index } //Try this out with //1) uniform priors, //2) probabilities estimated from the training data, //3) and Laplace smoothing double score = computeUtility(instance, featureIndex, probs, train, currentMeasure); pairs[i] = new Pair(i,score); //Associate the score with the query } //sort in DEScending order Arrays.sort(pairs, new Comparator() { public int compare(Object o1, Object o2) { double diff = ((Pair)o1).second - ((Pair)o2).second; return(diff < 0 ? 1 : diff > 0 ? -1 : 0); } }); if(m_UseWeightedSampling){//use probabilistic selection of queries pairs = sampleWithWeights(pairs, num); }//else select top n queries if(m_Debug) System.out.println("Selected list:"); for(int j=0; j<num; j++){ if(m_Debug) System.out.println("\t"+pairs[j].second+"\t"+pairs[j].first); queries[j] = (Pair) allQueries.get((int) pairs[j].first); } return queries; } /** * Sample from a distribution based on assigned scores. * * @param pairs array of pairs for object and score * @param num number of objects to select **/ protected Pair []sampleWithWeights(Pair []pairs, int num){ //convert array of pairs to vector int poolSize = pairs.length; Vector v = new Vector(poolSize); for(int i=0; i<poolSize; i++) v.add(pairs[i]); return sampleWithWeights(v, num); } /** * Sample from a distribution based on assigned scores. * * @param v vector of pairs for object and score * @param num number of objects to select **/ protected Pair []sampleWithWeights(Vector v, int num){ int poolSize = v.size(); //Assumes list is in descending order of scores //move range to account for any negative values double min = ((Pair) v.get(poolSize - 1)).second; Pair curr; if(min < 0){ for(int i=0; i<poolSize; i++){ curr = (Pair) v.get(i); curr.second = curr.second - min; } } Pair []selected = new Pair[num]; double sum; /* For j=1 to n * Create a cdf * Randomly pick instance based on cdf * Note index and remove element */ for(int j=0; j<num; j++){ sum = 0; for(int i=0; i<v.size(); i++) sum += ((Pair) v.get(i)).second; //normalize// if (Double.isNaN(sum)) {// for(int i=0; i<v.size(); i++)// System.err.print(((Pair) v.get(i)).second+" ");// System.err.println();// throw new IllegalArgumentException("Can't normalize array. Sum is NaN. Sum = "+sum);// } // if (sum == 0) { if (sum == 0 || Double.isNaN(sum)) { System.err.println("Sum = "+sum+", setting to uniform weights."); //set probabilities for uniform selection double uniform = 1.0/v.size(); for(int i=0; i<v.size(); i++) ((Pair) v.get(i)).second = uniform; sum = 1.0; }else{ for(int i=0; i<v.size(); i++) ((Pair) v.get(i)).second = ((Pair) v.get(i)).second/sum; } //create a cdf double []cdf = new double[v.size()]; cdf[0] = ((Pair) v.get(0)).second; for(int i=1; i<v.size(); i++) cdf[i] = ((Pair) v.get(i)).second + cdf[i-1]; double rnd = m_Random.nextDouble(); int index = 0; while(index < cdf.length && rnd > cdf[index]){ index++; } selected[j] = (Pair) v.get(index); v.remove(index); } assert v.size()+num==poolSize : v.size()+" + "+num+" != "+poolSize+"\n"; return selected; } // //randomly shuffle the given list// protected void shuffle(ArrayList list){// System.out.println("Doing the shuffle...");// Random random = new Random(m_Seed);// Object obj; int loc;// for (int j = list.size()-1; j > 0; j--){// //swap objects// obj = list.get(j);// loc = random.nextInt(j+1);// list.set(j,list.get(loc));// list.set(loc,obj);// }// } /** * Compute the utility of the instance-feature pair. * Expected accuracy, Acc_{t+1} = Sigma_i (P(Fj) * Acc(M(Fj)) * Score = (Acc_{t+1} - Acc_{t})/Cost_of_Fj * Score can be computed for measures other than accuracy, e.g. entropy. * * @param instance instance under consideration * @param featureIndex feature under consideration * @param probs predicted probability of each feature-value for the instance * @param train training set over which utility is measured * @param currentMeasure the accuracy/entropy of the current model */ protected double computeUtility(Instance instance, int featureIndex, double []probs, Instances train, double currentMeasure) throws Exception{ //For each feature-value with a non-zero probability generate a classifier //Measure accuracy of the classifier //Compute score as the expected accuracy of the classifier double sum = 0.0; int numValues = train.attribute(featureIndex).numValues(); Classifier classifier; Evaluation eval; double utility; //Assumes that probs is actually a distribution i.e. adds up to 1.0 for(int i=0; i<numValues; i++){ if(probs==null || probs[i]!=0){ Classifier tmp[] = Classifier.makeCopies(m_Classifier,1); classifier = tmp[0]; instance.setValue(featureIndex, i); classifier.buildClassifier(train);//train classifier assuming current value for feature instance.setMissing(featureIndex);//reset feature to be missing //DEBUG should handle Evaluation(train, costMatrix) if(m_Policy==EXPECTED_UTILITY_ENTROPY || m_Policy==HBL_ENTROPY){ //if(m_Debug) System.out.println("Using entropy..."); //compute the expected entropy eval = new Evaluation(train); eval.evaluateModel(classifier, train); utility = -1 * eval.SFMeanSchemeEntropy(); }else{ //compute expected accuracy utility = computeAccuracy(classifier, train); //accuracy = eval.pctCorrect(); } if(m_UseNaiveBayes) { sum += m_Distributions[featureIndex][(int)instance.classValue()].getProbability(i)* utility; }else{ sum += probs[i]*utility; } } } return ((sum - currentMeasure)/m_FeatureCosts[featureIndex]); } //Compute current model's accuracy/entropy on training set protected double computeCurrentMeasure(Instances train) throws Exception{ Evaluation eval; double measure = 0.0; if(m_Policy==EXPECTED_UTILITY_ENTROPY || m_Policy==HBL_ENTROPY){ //if(m_Debug) System.out.println("Using entropy..."); //compute the current (negative) entropy eval = new Evaluation(train); eval.evaluateModel(m_Classifier, train); measure = -1 * eval.SFMeanSchemeEntropy(); }else{ //compute expected accuracy measure = computeAccuracy(m_Classifier, train); } return measure; } /** * Computes the accuracy in classification on the given data. * * @param data the instances to be classified * @return classification accuracy * @exception Exception if error can not be computed successfully */ protected double computeAccuracy(Classifier classifier, Instances data) throws Exception { double acc = 0.0; int numInstances = data.numInstances(); Instance curr; for(int i=0; i<numInstances; i++){ curr = data.instance(i); //Check if the instance has been correctly classified if(curr.classValue() == ((int) classifier.classifyInstance(curr))) acc++; } return (acc/numInstances); } //For debugging purposes void printArray(double []array){ for(int i=0; i<array.length; i++) System.out.print(array[i]+" "); System.out.println(); } /** * Build a classifier based on the selected base learner. * * @param data the training data to be used for generating the * Blue classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { m_Classifier.buildClassifier(data); } /** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return preedicted class probability distribution * @exception Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { return m_Classifier.distributionForInstance(instance); } /** * Returns description of the bagged classifier. * * @return description of the bagged classifier as a string */ public String toString() { return m_Classifier.toString(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(1); newVector.addElement(new Option( "\tFull name of classifier to bag.\n" + "\teg: weka.classifiers.trees.j48.J48", "W", 1, "-W")); if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) { newVector.addElement(new Option( "", "", 0, "\nOptions specific to classifier " + m_Classifier.getClass().getName() + ":")); Enumeration enum = ((OptionHandler)m_Classifier).listOptions(); while (enum.hasMoreElements()) { newVector.addElement(enum.nextElement()); } } return newVector.elements(); } /** * Main method for testing this class. * * @param argv the options */ public static void main(String [] argv) { try { System.out.println(Evaluation. evaluateModel(new Blue(), argv)); } catch (Exception e) { System.err.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -