📄 lbr.java
字号:
// Selecting all the attributes that can be moved to the lefthand. while ( localErrors >= 5 ){ attributeBest = -1; whileCnt++; // Step 5: tempErrorsBest = subInstances.getNumInstancesSet() + 1; subInstances.setSequentialDataset(true); // Step 6: selecting an attribute. for (int attr = 0; attr < subInstances.m_NumSeqAttsSet; attr++){ forCnt++; subAttrIndex = subInstances.m_SequentialAttIndexes[attr]; // Step 7: get the corresponding subset. m_RemainderErrors = 0; // reset array to true for(int i = 0; i < m_numInsts; i++) { m_subOldErrorFlags[i] = true; } // reset indexes to reflect an empty dataset but with the same attrs as another dataset tempSubInstances.resetDatasetBasedOn(subInstances); // Get subset of the instances and its m_LastSecondErrors for(inst = 0; inst < subInstances.m_NumSeqInstsSet; inst++) { subInstIndex = subInstances.m_SequentialInstIndexes[inst]; if (m_Instances.instance(subInstIndex).value(subAttrIndex) == testInstance.value(subAttrIndex)) { // add instance to subset list tempSubInstances.setInstanceIndex(subInstIndex, true); if (localErrorFlags[subInstIndex] == false ) { m_subOldErrorFlags[subInstIndex] = false; } } else { if (localErrorFlags[subInstIndex] == false ) { m_RemainderErrors++; } } } // end of for // Step 7': if (tempSubInstances.m_NumInstsSet < subInstances.m_NumInstsSet) { // remove attribute from index tempSubInstances.setAttIndex(subAttrIndex, false); // Step 9: create a classifier on the subset. // Compute counts and priors // create sequential index of instances and attributes that are to be considered localNaiveBayes(tempSubInstances); subLocalErrors = leaveOneOut(tempSubInstances, m_Counts, m_Priors, subLocalErrorFlags); errorsNewNotOld = 0; errorsOldNotNew = 0; tempSubInstances.setSequentialDataset(true); for(int t_inst = 0; t_inst < tempSubInstances.m_NumSeqInstsSet; t_inst++) { tempInstIndex = tempSubInstances.m_SequentialInstIndexes[t_inst]; if (subLocalErrorFlags[tempInstIndex] == false) { // The number of errors on New, Not on Old in the subset. if (m_subOldErrorFlags[tempInstIndex] == true) { errorsNewNotOld ++; } } else { // The number of errors on Old, Not on New in the subset. if(m_subOldErrorFlags[tempInstIndex] == false) { errorsOldNotNew ++; } } } //end of for // Step 10 and Step 11: int tempErrors = subLocalErrors + m_RemainderErrors; // Step 12: // Step 13: stopping criteria. if((tempErrors < tempErrorsBest) && (binomP(errorsNewNotOld, errorsNewNotOld + errorsOldNotNew, 0.5 ) < SIGNLOWER)) { // Step 14: tempCnt++; // -------------------------------------------------- //tempD_subsetBest = new Indexes(tempSubInstances); // ------------------------------------------------------------------------------- tempSubInstances.setSequentialDataset(true); tempD_subsetBestInsts = (int []) tempSubInstances.m_SequentialInstIndexes.clone(); tempD_subsetBestAtts = (int []) tempSubInstances.m_SequentialAttIndexes.clone(); // ------------------------------------------------------------------------------- // Step 15: tempErrorsBest = tempErrors; tempErrorFlagBest = (boolean []) subLocalErrorFlags.clone(); // Step 16: attributeBest = subAttrIndex; } // end of if } // end of if } // end of main for // Step 20: if(attributeBest != -1) { bestCnt++; // Step 21: leftHand.add(testInstance.attribute(attributeBest)); // ------------------------------------------------ // Step 22: //tempD_subsetBest.setAttIndex(attributeBest, false); //subInstances = tempD_subsetBest; // ------------------------------------------------ subInstances.setInsts(tempD_subsetBestInsts, true); subInstances.setAtts(tempD_subsetBestAtts, true); subInstances.setAttIndex(attributeBest, false); // ------------------------------------------------- // Step 25: localErrors = tempErrorsBest; localErrorFlags = tempErrorFlagBest; } else { break; } } // end of while // Step 27: localNaiveBayes(subInstances); return localDistributionForInstance(testInstance, subInstances); } /** * Returns a description of the classifier. * * @return a description of the classifier as a string. */ public String toString() { if (m_Instances == null) { return "Lazy Bayesian Rule: No model built yet."; } try { StringBuffer text = new StringBuffer ("=== LBR Run information ===\n\n"); text.append("Scheme: weka.classifiers.LBR\n"); text.append("Relation: " + m_Instances.attribute(m_Instances.classIndex()).name() + "\n"); text.append("Instances: "+m_Instances.numInstances()+"\n"); text.append("Attributes: "+m_Instances.numAttributes()+"\n"); // Remains are printed by Evaulation.java return text.toString(); } catch (Exception e) { e.printStackTrace(); return "Can't Print Lazy Bayes Rule Classifier!"; } } /** * Leave-one-out strategy. For a given sample data set with n instances, * using (n - 1) instances by leaving one out and tested on the single * remaining case. * This is repeated n times in turn. * The final "Error" is the sum of the instances to be classified * incorrectly. * * @param instances set of instances serving as training data. * @param counts serving as all the counts of training data. * @param priors serving as the number of instances in each class. * * @return error flag array about each instance. * **/ public int leaveOneOut(Indexes instanceIndex, int [][][] counts, int [] priors, boolean [] errorFlags) throws Exception { // ###### START LEAVE ONE OUT ############# int tempClassValue; double posteriors; double sumForPriors; double sumForCounts; double max = 0; int maxIndex = 0; int AIndex, attIndex, clss; int inst; int errors = 0; int instIndex; Instance instance; instanceIndex.setSequentialDataset(true); int tempInstanceClassValue; int [] tempAttributeValues = new int[(int)instanceIndex.m_NumSeqAttsSet+1]; Instance tempInstance; for(inst = 0; inst < instanceIndex.m_NumSeqInstsSet; inst++) { instIndex = instanceIndex.m_SequentialInstIndexes[inst]; //get the leave-one-out instance tempInstance = (Instance) m_Instances.instance(instIndex); if (!tempInstance.classIsMissing()) { tempInstanceClassValue = (int)tempInstance.classValue(); // Compute the counts and priors for (n-1) instances. for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; tempAttributeValues[attIndex] = (int)tempInstance.value(AIndex); counts[tempInstanceClassValue][AIndex][tempAttributeValues[attIndex]]--; } priors[tempInstanceClassValue]--; max = 0; maxIndex= 0; // ###### LOCAL CLASSIFY INSTANCE ########### sumForPriors = Utils.sum(priors); for (clss = 0; clss < m_numClasses; clss++) { posteriors = 0.0; posteriors = (priors[clss] + 1) / (sumForPriors + m_numClasses); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; if (!tempInstance.isMissing(AIndex)) { sumForCounts = Utils.sum(counts[clss][AIndex]); posteriors *= ((counts[clss][AIndex][tempAttributeValues[attIndex]] + 1) / (sumForCounts + (double)tempInstance.attribute(AIndex).numValues())); } } if (posteriors > max) { maxIndex = clss; max = posteriors; } } // end of for if (max > 0) { tempClassValue = maxIndex; } else { tempClassValue = (int)tempInstance.missingValue(); } // ###### END LOCAL CLASSIFY INSTANCE ########### // Adjudge error. Here using classIndex is incorrect, // it is index of the class attribute. if(tempClassValue == tempInstanceClassValue){ errorFlags[instIndex] = true; } else { errorFlags[instIndex] = false; errors++; } for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; counts[tempInstanceClassValue][AIndex][tempAttributeValues[attIndex]]++; } priors[tempInstanceClassValue]++; } } // end of for // ###### END LEAVE ONE OUT ############# return errors; } /** * Class for building and using a simple Naive Bayes classifier. * For more information, see<p> * * Richard Duda and Peter Hart (1973).<i>Pattern * Classification and Scene Analysis</i>. Wiley, New York. * * This method only get m_Counts and m_Priors. * * @param instances set of instances serving as training data * @exception Exception if m_Counts and m_Priors have not been * generated successfully */ public void localNaiveBayes(Indexes instanceIndex) throws Exception { int attIndex = 0; int i, v, AIndex; int attVal = 0; int classVal = 0; Instance instance; instanceIndex.setSequentialDataset(true); // reset local counts for(classVal = 0; classVal < m_numClasses; classVal++) { for(attIndex = 0; attIndex < m_numAtts; attIndex++) { Attribute attribute = m_Instances.attribute(attIndex); for(attVal = 0; attVal < attribute.numValues(); attVal++) { m_Counts[classVal][attIndex][attVal] = 0; } } m_Priors[classVal] = 0; } for(i = 0; i < instanceIndex.m_NumSeqInstsSet; i++) { instance = (Instance) m_Instances.instance(instanceIndex.m_SequentialInstIndexes[i]); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; m_Counts[(int)instance.classValue()][AIndex][(int)instance.value(AIndex)]++; } m_Priors[(int)instance.classValue()]++; } } /** * Calculates the class membership probabilities. * for the given test instance. * * @param instance the instance to be classified * @param counts serving as all the counts of training data. * @param priors serving as the number of instances in each class. * * @return predicted class probability distribution * @exception Exception if distribution can't be computed */ public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception { double sumForPriors = 0; double sumForCounts = 0; int attIndex, AIndex; int numClassesOfInstance = instance.numClasses(); sumForPriors = 0; sumForCounts = 0; instanceIndex.setSequentialDataset(true); // Calculate all of conditional probabilities. sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance; for (int j = 0; j < numClassesOfInstance; j++) { posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; sumForCounts = Utils.sum(m_Counts[j][AIndex]); if (!instance.isMissing(AIndex)) { posteriorsArray[j] *= ((m_Counts[j][AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues())); } } } // Normalize probabilities Utils.normalize(posteriorsArray); return posteriorsArray; } /** * Significance test * binomp: * * @param double r, double n, double p. * @return returns the probability of obtaining r or fewer out of n * if the probability of an event is p. * */ public double binomP(double r, double n, double p) throws Exception { if (n == r) return 1.0; return Statistics.incompleteBeta(n-r, r+1.0, 1.0-p); } /** * Main method for testing this class. * * @param argv the options */ public static void main(String [] argv) { Classifier scheme; try { scheme = new LBR(); System.out.println(Evaluation.evaluateModel(scheme, argv)); } catch (Exception e) { System.err.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -