📄 activedecorate.java
字号:
//Compute cumulative probabilities cdf[0] = invProbs[0]; for(int i=1; i<invProbs.length; i++){ cdf[i] = invProbs[i]+cdf[i-1]; } if(Double.isNaN(cdf[invProbs.length-1])) System.err.println("Cumulative class membership probability is NaN!"); return selectIndexProbabilistically(cdf); } /** * Given cumulative probabilities select a nominal attribute value index * * @param cdf array of cumulative probabilities * @return index of attribute selected based on the probability distribution */ protected int selectIndexProbabilistically(double []cdf){ double rnd = m_Random.nextDouble(); int index = 0; while(index < cdf.length && rnd > cdf[index]){ index++; } return index; } /** * Removes a specified number of instances from the given set of instances. * * @param data given instances * @param numRemove number of instances to delete from the given instances */ protected void removeInstances(Instances data, int numRemove){ int num = data.numInstances(); for(int i=num - 1; i>num - 1 - numRemove;i--){ data.delete(i); } } /** * Add new instances to the given set of instances. * * @param data given instances * @param newData set of instances to add to given instances */ protected void addInstances(Instances data, Instances newData){ for(int i=0; i<newData.numInstances(); i++) data.add(newData.instance(i)); } /** * Computes the error in classification on the given data. * * @param data the instances to be classified * @return classification error * @exception Exception if error can not be computed successfully */ protected double computeError(Instances data) throws Exception { double error = 0.0; int numInstances = data.numInstances(); Instance curr; for(int i=0; i<numInstances; i++){ curr = data.instance(i); //Check if the instance has been misclassified if(curr.classValue() != ((int) classifyInstance(curr))) error++; } return (error/numInstances); } /** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { if (instance.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!"); } double [] sums = new double [instance.numClasses()], newProbs; Classifier curr; for (int i = 0; i < m_Committee.size(); i++) { curr = (Classifier) m_Committee.get(i); if (curr instanceof DistributionClassifier) { newProbs = ((DistributionClassifier)curr).distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } else { sums[(int)curr.classifyInstance(instance)]++; } } if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } } /** * Given a set of unlabeled examples, select a specified number of examples to be labeled. * @param unlabeledActivePool pool of unlabeled examples * @param num number of examples to selcted for labeling * @exception Exception if selective sampling fails */ public int [] selectInstances(Instances unlabeledActivePool,int num) throws Exception{ //Make a list of pairs of indices and the corresponding measure of informativenes of examples //Sort this in the order of informativeness and return the list of num indices int poolSize = unlabeledActivePool.numInstances(); Pair []pairs = new Pair[poolSize]; for(int i=0; i<poolSize; i++){ pairs[i] = new Pair(i,calculateDisagreement(unlabeledActivePool.instance(i))); } //sort in descending order Arrays.sort(pairs, new Comparator() { public int compare(Object o1, Object o2) { double diff = ((Pair)o1).second - ((Pair)o2).second; return(diff < 0 ? 1 : diff > 0 ? -1 : 0); } }); int []selected = new int[num]; if(m_Debug) System.out.println("Sorted list:"); for(int j=0; j<num; j++){ if(m_Debug) System.out.println("\t"+pairs[j].second+"\t"+pairs[j].first); selected[j] = (int) pairs[j].first; } return selected; } /** * Calculate the disagreement in the ensemble over the label of * given examples depending on the chosen selection scheme. * @param instance unlabeled instance from the current pool * @return nomalized measure of disagreement * @exception Exception if disagreement could not be calculated properly */ protected double calculateDisagreement(Instance instance) throws Exception{ double disagreement; switch(m_SelectionScheme){ case JENSEN_SHANNON: disagreement = calcJSDivergence(instance); break; case MAJORITY: disagreement = calcMajorityDis(instance); break; case EUCLIDEAN: disagreement = calcEuclideanDis(instance); break; case MARGIN: //negate margins so that the sort ordering does not need to be changed disagreement = -1.0 * calculateMargin(instance); break; case BAGGING: disagreement = -1.0 * m_SelectionCommittee.calculateMargin(instance); break; case BOOSTING: disagreement = -1.0 * m_SelectionCommittee.calculateMargin(instance); break; default: disagreement = calcMajorityDis(instance); } return disagreement; } /** * Calculate the disagreement in the ensemble over the label of * given examples. The disagreement is calculated between the * posterior probabilities of each member classifier and those of * the ensemble. * @param instance unlabeled instance from the current pool * @return nomalized measure of disagreement * @exception Exception if disagreement could not be calculated properly */ protected double calcJSDivergence(Instance instance) throws Exception{ if (!(m_Classifier instanceof DistributionClassifier)) System.err.println("JS Divergence can only be applied to DistributionClassifiers."); //if(m_Debug) System.out.println("Using JS Divergence."); int size = m_Committee.size(); double [][]probs = new double [size][]; double [] avg = new double [instance.numClasses()]; Classifier curr; for (int i = 0; i < m_Committee.size(); i++) { curr = (Classifier) m_Committee.get(i); probs[i] = ((DistributionClassifier)curr).distributionForInstance(instance); smoothDistribution(probs[i]); for (int j = 0; j < avg.length; j++) avg[j] += probs[i][j]; } Utils.normalize(avg); double disagreement = 0.0; for(int i=0; i<size; i++){ disagreement += calcKLdivergence(probs[i], avg); } disagreement = disagreement/m_Committee.size(); return disagreement; } //Smooth given probability distribution to get rid of zero values protected void smoothDistribution(double []probs){ for(int i=0; i<probs.length; i++) if(probs[i]==0) probs[i] = m_Epsilon; Utils.normalize(probs); } /** * Calculate the KL divergence between two probability distributions. * @param p1 first probability disttribution * @param p1 second probability disttribution * @return the KL divergence between p1 and p2 */ protected double calcKLdivergence(double []p1, double []p2){ double kl = 0.0; for(int i=0; i<p1.length; i++){ kl += p1[i]*Math.log(p2[i]/p1[i]); } kl = -1.0 * kl; return kl; } /** * Calculate the disagreement in the ensemble over the label of * given examples. The disagreement is calculated using the * Jensen-Shannon divergence of the posterior probabilities * @param instance unlabeled instance from the current pool * @return nomalized measure of disagreement * @exception Exception if disagreement could not be calculated properly */ protected double calcEuclideanDis(Instance instance) throws Exception{ if (!(m_Classifier instanceof DistributionClassifier)) System.err.println("Euclidean disagreement can only be applied to DistributionClassifiers."); //if(m_Debug) System.out.println("Using Euclidean disagreement."); double disagreement = 0.0; double []pred = distributionForInstance(instance);//ensemble decision Classifier curr; for (int i = 0; i < m_Committee.size(); i++) { curr = (Classifier) m_Committee.get(i); double sum = 0.0; double []newProbs = ((DistributionClassifier)curr).distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sum += Math.pow((newProbs[j]-pred[j]),2); disagreement += Math.sqrt(sum); } disagreement = disagreement/m_Committee.size(); //This normalization step is not necessary for selection, but //is useful for comparing disagreement values for different //points on the learning curves (as committee size can change) return disagreement; } /** * Calculate the disagreement in the ensemble over the label of given examples. * @param instance unlabeled instance from the current pool * @return nomalized measure of disagreement * @exception Exception if disagreement could not be calculated properly */ protected double calcMajorityDis(Instance instance) throws Exception{ //if(m_Debug) System.out.println("Using majority vote disagreement."); double disagreement = 0.0; double pred = classifyInstance(instance);//ensemble decision Classifier curr; for (int i = 0; i < m_Committee.size(); i++) { curr = (Classifier) m_Committee.get(i); if(curr.classifyInstance(instance) != pred) disagreement++; } disagreement = disagreement/m_Committee.size(); //This normalization step is not necessary for selection, but //is useful for comparing disagreement values for different //points on the learning curves (as committee size can change) return disagreement; } /** * Returns description of the Decorate classifier. * * @return description of the Decorate classifier as a string */ public String toString() { if (m_Committee == null) { return "Decorate: No model built yet."; } StringBuffer text = new StringBuffer(); text.append("Decorate base classifiers: \n\n"); for (int i = 0; i < m_Committee.size(); i++) text.append(((Classifier) m_Committee.get(i)).toString() + "\n\n"); text.append("Number of classifier in the ensemble: "+m_Committee.size()+"\n"); return text.toString(); } /** * Main method for testing this class. * * @param argv the options */ public static void main(String [] argv) { try { System.out.println(Evaluation.evaluateModel(new ActiveDecorate(), argv)); } catch (Exception e) { System.err.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -