📄 lbr.java
字号:
public double[] distributionForInstance(Instance testInstance) throws Exception { int inst; int subAttrIndex = 0; int subInstIndex = 0; int tempInstIndex = 0; int attributeBest; int subLocalErrors = 0; int tempErrorsBest = 0; boolean [] tempErrorFlagBest = null; int [] tempD_subsetBestInsts = null; int [] tempD_subsetBestAtts = null; Indexes subInstances = new Indexes(m_numInsts, m_numAtts, true, m_Instances.classIndex()); boolean [] subLocalErrorFlags = new boolean [(int)subInstances.getNumInstances()+1]; // Step 2': Get localErrors, localErrorFlags, and training data set. int localErrors = m_Errors; boolean [] localErrorFlags = (boolean []) m_ErrorFlags.clone(); // The number of errors on New, Not on Old in the subset. int errorsNewNotOld = 0; // The number of errors on Old, Not on New in the subset. int errorsOldNotNew = 0; // Step 3: leftHand.clear(); // Step 4: Beginning Repeat. // Selecting all the attributes that can be moved to the lefthand. while (localErrors >= 5) { attributeBest = -1; whileCnt++; // Step 5: tempErrorsBest = subInstances.getNumInstancesSet() + 1; subInstances.setSequentialDataset(true); // Step 6: selecting an attribute. for (int attr = 0; attr < subInstances.m_NumSeqAttsSet; attr++){ forCnt++; subAttrIndex = subInstances.m_SequentialAttIndexes[attr]; // Step 7: get the corresponding subset. m_RemainderErrors = 0; // reset array to true for(int i = 0; i < m_numInsts; i++) { m_subOldErrorFlags[i] = true; } // reset indexes to reflect an empty dataset but with the same attrs as another dataset tempSubInstances.resetDatasetBasedOn(subInstances); // Get subset of the instances and its m_LastSecondErrors for(inst = 0; inst < subInstances.m_NumSeqInstsSet; inst++) { subInstIndex = subInstances.m_SequentialInstIndexes[inst]; if (m_Instances.instance(subInstIndex).value(subAttrIndex) == testInstance.value(subAttrIndex)) { // add instance to subset list tempSubInstances.setInstanceIndex(subInstIndex, true); if (localErrorFlags[subInstIndex] == false ) { m_subOldErrorFlags[subInstIndex] = false; } } else { if (localErrorFlags[subInstIndex] == false ) { m_RemainderErrors++; } } } // end of for // Step 7': if (tempSubInstances.m_NumInstsSet < subInstances.m_NumInstsSet) { // remove attribute from index tempSubInstances.setAttIndex(subAttrIndex, false); // Step 9: create a classifier on the subset. // Compute counts and priors // create sequential index of instances and attributes that are to be considered localNaiveBayes(tempSubInstances); subLocalErrors = leaveOneOut(tempSubInstances, m_Counts, m_Priors, subLocalErrorFlags); errorsNewNotOld = 0; errorsOldNotNew = 0; tempSubInstances.setSequentialDataset(true); for(int t_inst = 0; t_inst < tempSubInstances.m_NumSeqInstsSet; t_inst++) { tempInstIndex = tempSubInstances.m_SequentialInstIndexes[t_inst]; if (subLocalErrorFlags[tempInstIndex] == false) { // The number of errors on New, Not on Old in the subset. if (m_subOldErrorFlags[tempInstIndex] == true) { errorsNewNotOld ++; } } else { // The number of errors on Old, Not on New in the subset. if(m_subOldErrorFlags[tempInstIndex] == false) { errorsOldNotNew ++; } } } //end of for // Step 10 and Step 11: int tempErrors = subLocalErrors + m_RemainderErrors; // Step 12: // Step 13: stopping criteria. if((tempErrors < tempErrorsBest) && (binomP(errorsNewNotOld, errorsNewNotOld + errorsOldNotNew, 0.5 ) < SIGNLOWER)) { // Step 14: tempCnt++; // -------------------------------------------------- //tempD_subsetBest = new Indexes(tempSubInstances); // ------------------------------------------------------------------------------- tempSubInstances.setSequentialDataset(true); tempD_subsetBestInsts = (int []) tempSubInstances.m_SequentialInstIndexes.clone(); tempD_subsetBestAtts = (int []) tempSubInstances.m_SequentialAttIndexes.clone(); // ------------------------------------------------------------------------------- // Step 15: tempErrorsBest = tempErrors; tempErrorFlagBest = (boolean []) subLocalErrorFlags.clone(); // Step 16: attributeBest = subAttrIndex; } // end of if } // end of if } // end of main for // Step 20: if(attributeBest != -1) { bestCnt++; // Step 21: leftHand.add(testInstance.attribute(attributeBest)); // ------------------------------------------------ // Step 22: //tempD_subsetBest.setAttIndex(attributeBest, false); //subInstances = tempD_subsetBest; // ------------------------------------------------ subInstances.setInsts(tempD_subsetBestInsts, true); subInstances.setAtts(tempD_subsetBestAtts, true); subInstances.setAttIndex(attributeBest, false); // ------------------------------------------------- // Step 25: localErrors = tempErrorsBest; localErrorFlags = tempErrorFlagBest; } else { break; } } // end of while // Step 27: localNaiveBayes(subInstances); return localDistributionForInstance(testInstance, subInstances); } /** * Returns a description of the classifier. * * @return a description of the classifier as a string. */ public String toString() { if (m_Instances == null) { return "Lazy Bayesian Rule: No model built yet."; } try { StringBuffer text = new StringBuffer ("=== LBR Run information ===\n\n"); text.append("Scheme: weka.classifiers.LBR\n"); text.append("Relation: " + m_Instances.attribute(m_Instances.classIndex()).name() + "\n"); text.append("Instances: "+m_Instances.numInstances()+"\n"); text.append("Attributes: "+m_Instances.numAttributes()+"\n"); // Remains are printed by Evaulation.java return text.toString(); } catch (Exception e) { e.printStackTrace(); return "Can't Print Lazy Bayes Rule Classifier!"; } } /** * Leave-one-out strategy. For a given sample data set with n instances, * using (n - 1) instances by leaving one out and tested on the single * remaining case. * This is repeated n times in turn. * The final "Error" is the sum of the instances to be classified * incorrectly. * * @param instanceIndex set of instances serving as training data. * @param counts serving as all the counts of training data. * @param priors serving as the number of instances in each class. * @param errorFlags for the errors * * @return error flag array about each instance. * @throws Exception if something goes wrong **/ public int leaveOneOut(Indexes instanceIndex, int [][][] counts, int [] priors, boolean [] errorFlags) throws Exception { // ###### START LEAVE ONE OUT ############# int tempClassValue; double posteriors; double sumForPriors; double sumForCounts; double max = 0; int maxIndex = 0; int AIndex, attIndex, clss; int inst; int errors = 0; int instIndex; instanceIndex.setSequentialDataset(true); int tempInstanceClassValue; int [] tempAttributeValues = new int[(int)instanceIndex.m_NumSeqAttsSet+1]; Instance tempInstance; for(inst = 0; inst < instanceIndex.m_NumSeqInstsSet; inst++) { instIndex = instanceIndex.m_SequentialInstIndexes[inst]; //get the leave-one-out instance tempInstance = (Instance) m_Instances.instance(instIndex); if (!tempInstance.classIsMissing()) { tempInstanceClassValue = (int)tempInstance.classValue(); // pointer to first index of counts matrix for efficiency int [][] countsPointer = counts[tempInstanceClassValue]; // Compute the counts and priors for (n-1) instances. for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; tempAttributeValues[attIndex] = (int)tempInstance.value(AIndex); countsPointer[AIndex][tempAttributeValues[attIndex]]--; } priors[tempInstanceClassValue]--; max = 0; maxIndex= 0; // ###### LOCAL CLASSIFY INSTANCE ########### sumForPriors = Utils.sum(priors); for (clss = 0; clss < m_numClasses; clss++) { posteriors = 0.0; posteriors = (priors[clss] + 1) / (sumForPriors + m_numClasses); countsPointer = counts[clss]; for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; if (!tempInstance.isMissing(AIndex)) { sumForCounts = Utils.sum(countsPointer[AIndex]); posteriors *= ((countsPointer[AIndex][tempAttributeValues[attIndex]] + 1) / (sumForCounts + (double)tempInstance.attribute(AIndex).numValues())); } } if (posteriors > max) { maxIndex = clss; max = posteriors; } } // end of for if (max > 0) { tempClassValue = maxIndex; } else { tempClassValue = (int)Instance.missingValue(); } // ###### END LOCAL CLASSIFY INSTANCE ########### // Adjudge error. Here using classIndex is incorrect, // it is index of the class attribute. if(tempClassValue == tempInstanceClassValue){ errorFlags[instIndex] = true; } else { errorFlags[instIndex] = false; errors++; } countsPointer = counts[tempInstanceClassValue]; for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; counts[tempInstanceClassValue][AIndex][tempAttributeValues[attIndex]]++; } priors[tempInstanceClassValue]++; } } // end of for // ###### END LEAVE ONE OUT ############# return errors; } /** * Class for building and using a simple Naive Bayes classifier. * For more information, see<p> * * Richard Duda and Peter Hart (1973).<i>Pattern * Classification and Scene Analysis</i>. Wiley, New York. * * This method only get m_Counts and m_Priors. * * @param instanceIndex set of instances serving as training data * @throws Exception if m_Counts and m_Priors have not been * generated successfully */ public void localNaiveBayes(Indexes instanceIndex) throws Exception { int attIndex = 0; int i, AIndex; int attVal = 0; int classVal = 0; Instance instance; instanceIndex.setSequentialDataset(true); // reset local counts for(classVal = 0; classVal < m_numClasses; classVal++) { // counts pointer mcTimesaver int [][] countsPointer1 = m_Counts[classVal]; for(attIndex = 0; attIndex < m_numAtts; attIndex++) { Attribute attribute = m_Instances.attribute(attIndex); // love those pointers for saving time int [] countsPointer2 = countsPointer1[attIndex]; for(attVal = 0; attVal < attribute.numValues(); attVal++) { countsPointer2[attVal] = 0; } } m_Priors[classVal] = 0; } for(i = 0; i < instanceIndex.m_NumSeqInstsSet; i++) { instance = (Instance) m_Instances.instance(instanceIndex.m_SequentialInstIndexes[i]); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; m_Counts[(int)instance.classValue()][AIndex][(int)instance.value(AIndex)]++; } m_Priors[(int)instance.classValue()]++; } } /** * Calculates the class membership probabilities. * for the given test instance. * * @param instance the instance to be classified * @param instanceIndex * * @return predicted class probability distribution * @throws Exception if distribution can't be computed */ public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception { double sumForPriors = 0; double sumForCounts = 0; int attIndex, AIndex; int numClassesOfInstance = instance.numClasses(); sumForPriors = 0; sumForCounts = 0; instanceIndex.setSequentialDataset(true); // Calculate all of conditional probabilities. sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance; for (int j = 0; j < numClassesOfInstance; j++) { // pointer to counts to make access more efficient in loop int [][] countsPointer = m_Counts[j]; posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; sumForCounts = Utils.sum(countsPointer[AIndex]); if (!instance.isMissing(AIndex)) { posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues())); } } } // Normalize probabilities Utils.normalize(posteriorsArray); return posteriorsArray; } /** * Significance test * binomp: * * @param r * @param n * @param p * @return returns the probability of obtaining r or fewer out of n * if the probability of an event is p. * @throws Exception if computation fails */ public double binomP(double r, double n, double p) throws Exception { if (n == r) return 1.0; return Statistics.incompleteBeta(n-r, r+1.0, 1.0-p); } /** * Main method for testing this class. * * @param argv the options */ public static void main(String [] argv) { runClassifier(new LBR(), argv); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -