📄 lbr.java
字号:
tempErrorsBest = subInstances.getNumInstancesSet() + 1;
subInstances.setSequentialDataset(true);
// Step 6: selecting an attribute.
for (int attr = 0; attr < subInstances.m_NumSeqAttsSet; attr++){
forCnt++;
subAttrIndex = subInstances.m_SequentialAttIndexes[attr];
// Step 7: get the corresponding subset.
m_RemainderErrors = 0;
// reset array to true
for(int i = 0; i < m_numInsts; i++) {
m_subOldErrorFlags[i] = true;
}
// reset indexes to reflect an empty dataset but with the same attrs as another dataset
tempSubInstances.resetDatasetBasedOn(subInstances);
// Get subset of the instances and its m_LastSecondErrors
for(inst = 0; inst < subInstances.m_NumSeqInstsSet; inst++) {
subInstIndex = subInstances.m_SequentialInstIndexes[inst];
if (m_Instances.instance(subInstIndex).value(subAttrIndex) == testInstance.value(subAttrIndex)) {
// add instance to subset list
tempSubInstances.setInstanceIndex(subInstIndex, true);
if (localErrorFlags[subInstIndex] == false ) {
m_subOldErrorFlags[subInstIndex] = false;
}
}
else {
if (localErrorFlags[subInstIndex] == false ) {
m_RemainderErrors++;
}
}
} // end of for
// Step 7':
if (tempSubInstances.m_NumInstsSet < subInstances.m_NumInstsSet) {
// remove attribute from index
tempSubInstances.setAttIndex(subAttrIndex, false);
// Step 9: create a classifier on the subset.
// Compute counts and priors
// create sequential index of instances and attributes that are to be considered
localNaiveBayes(tempSubInstances);
subLocalErrors = leaveOneOut(tempSubInstances, m_Counts, m_Priors, subLocalErrorFlags);
errorsNewNotOld = 0;
errorsOldNotNew = 0;
tempSubInstances.setSequentialDataset(true);
for(int t_inst = 0; t_inst < tempSubInstances.m_NumSeqInstsSet; t_inst++) {
tempInstIndex = tempSubInstances.m_SequentialInstIndexes[t_inst];
if (subLocalErrorFlags[tempInstIndex] == false) {
// The number of errors on New, Not on Old in the subset.
if (m_subOldErrorFlags[tempInstIndex] == true) {
errorsNewNotOld ++;
}
} else {
// The number of errors on Old, Not on New in the subset.
if(m_subOldErrorFlags[tempInstIndex] == false) {
errorsOldNotNew ++;
}
}
} //end of for
// Step 10 and Step 11:
int tempErrors = subLocalErrors + m_RemainderErrors;
// Step 12:
// Step 13: stopping criteria.
if((tempErrors < tempErrorsBest) && (binomP(errorsNewNotOld, errorsNewNotOld + errorsOldNotNew, 0.5 ) < SIGNLOWER)) {
// Step 14:
tempCnt++;
// --------------------------------------------------
//tempD_subsetBest = new Indexes(tempSubInstances);
// -------------------------------------------------------------------------------
tempSubInstances.setSequentialDataset(true);
tempD_subsetBestInsts = (int []) tempSubInstances.m_SequentialInstIndexes.clone();
tempD_subsetBestAtts = (int []) tempSubInstances.m_SequentialAttIndexes.clone();
// -------------------------------------------------------------------------------
// Step 15:
tempErrorsBest = tempErrors;
tempErrorFlagBest = (boolean []) subLocalErrorFlags.clone();
// Step 16:
attributeBest = subAttrIndex;
} // end of if
} // end of if
} // end of main for
// Step 20:
if(attributeBest != -1) {
bestCnt++;
// Step 21:
leftHand.add(testInstance.attribute(attributeBest));
// ------------------------------------------------
// Step 22:
//tempD_subsetBest.setAttIndex(attributeBest, false);
//subInstances = tempD_subsetBest;
// ------------------------------------------------
subInstances.setInsts(tempD_subsetBestInsts, true);
subInstances.setAtts(tempD_subsetBestAtts, true);
subInstances.setAttIndex(attributeBest, false);
// -------------------------------------------------
// Step 25:
localErrors = tempErrorsBest;
localErrorFlags = tempErrorFlagBest;
} else {
break;
}
} // end of while
// Step 27:
localNaiveBayes(subInstances);
return localDistributionForInstance(testInstance, subInstances);
}
/**
* Returns a description of the classifier.
*
* @return a description of the classifier as a string.
*/
public String toString() {
if (m_Instances == null) {
return "Lazy Bayesian Rule: No model built yet.";
}
try {
StringBuffer text = new StringBuffer
("=== LBR Run information ===\n\n");
text.append("Scheme: weka.classifiers.LBR\n");
text.append("Relation: "
+ m_Instances.attribute(m_Instances.classIndex()).name()
+ "\n");
text.append("Instances: "+m_Instances.numInstances()+"\n");
text.append("Attributes: "+m_Instances.numAttributes()+"\n");
// Remains are printed by Evaulation.java
return text.toString();
} catch (Exception e) {
e.printStackTrace();
return "Can't Print Lazy Bayes Rule Classifier!";
}
}
/**
* Leave-one-out strategy. For a given sample data set with n instances,
* using (n - 1) instances by leaving one out and tested on the single
* remaining case.
* This is repeated n times in turn.
* The final "Error" is the sum of the instances to be classified
* incorrectly.
*
* @param instances set of instances serving as training data.
* @param counts serving as all the counts of training data.
* @param priors serving as the number of instances in each class.
*
* @return error flag array about each instance.
*
**/
public int leaveOneOut(Indexes instanceIndex, int [][][] counts, int [] priors, boolean [] errorFlags) throws Exception {
// ###### START LEAVE ONE OUT #############
int tempClassValue;
double posteriors;
double sumForPriors;
double sumForCounts;
double max = 0;
int maxIndex = 0;
int AIndex, attIndex, clss;
int inst;
int errors = 0;
int instIndex;
Instance instance;
instanceIndex.setSequentialDataset(true);
int tempInstanceClassValue;
int [] tempAttributeValues = new int[(int)instanceIndex.m_NumSeqAttsSet+1];
Instance tempInstance;
for(inst = 0; inst < instanceIndex.m_NumSeqInstsSet; inst++) {
instIndex = instanceIndex.m_SequentialInstIndexes[inst];
//get the leave-one-out instance
tempInstance = (Instance) m_Instances.instance(instIndex);
if (!tempInstance.classIsMissing()) {
tempInstanceClassValue = (int)tempInstance.classValue();
// pointer to first index of counts matrix for efficiency
int [][] countsPointer = counts[tempInstanceClassValue];
// Compute the counts and priors for (n-1) instances.
for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
tempAttributeValues[attIndex] = (int)tempInstance.value(AIndex);
countsPointer[AIndex][tempAttributeValues[attIndex]]--;
}
priors[tempInstanceClassValue]--;
max = 0;
maxIndex= 0;
// ###### LOCAL CLASSIFY INSTANCE ###########
sumForPriors = Utils.sum(priors);
for (clss = 0; clss < m_numClasses; clss++) {
posteriors = 0.0;
posteriors = (priors[clss] + 1) / (sumForPriors + m_numClasses);
countsPointer = counts[clss];
for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
if (!tempInstance.isMissing(AIndex)) {
sumForCounts = Utils.sum(countsPointer[AIndex]);
posteriors *= ((countsPointer[AIndex][tempAttributeValues[attIndex]] + 1) / (sumForCounts + (double)tempInstance.attribute(AIndex).numValues()));
}
}
if (posteriors > max) {
maxIndex = clss;
max = posteriors;
}
} // end of for
if (max > 0) {
tempClassValue = maxIndex;
} else {
tempClassValue = (int)tempInstance.missingValue();
}
// ###### END LOCAL CLASSIFY INSTANCE ###########
// Adjudge error. Here using classIndex is incorrect,
// it is index of the class attribute.
if(tempClassValue == tempInstanceClassValue){
errorFlags[instIndex] = true;
} else {
errorFlags[instIndex] = false;
errors++;
}
countsPointer = counts[tempInstanceClassValue];
for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
counts[tempInstanceClassValue][AIndex][tempAttributeValues[attIndex]]++;
}
priors[tempInstanceClassValue]++;
}
} // end of for
// ###### END LEAVE ONE OUT #############
return errors;
}
/**
* Class for building and using a simple Naive Bayes classifier.
* For more information, see<p>
*
* Richard Duda and Peter Hart (1973).<i>Pattern
* Classification and Scene Analysis</i>. Wiley, New York.
*
* This method only get m_Counts and m_Priors.
*
* @param instances set of instances serving as training data
* @exception Exception if m_Counts and m_Priors have not been
* generated successfully
*/
public void localNaiveBayes(Indexes instanceIndex) throws Exception {
int attIndex = 0;
int i, v, AIndex;
int attVal = 0;
int classVal = 0;
Instance instance;
instanceIndex.setSequentialDataset(true);
// reset local counts
for(classVal = 0; classVal < m_numClasses; classVal++) {
// counts pointer mcTimesaver
int [][] countsPointer1 = m_Counts[classVal];
for(attIndex = 0; attIndex < m_numAtts; attIndex++) {
Attribute attribute = m_Instances.attribute(attIndex);
// love those pointers for saving time
int [] countsPointer2 = countsPointer1[attIndex];
for(attVal = 0; attVal < attribute.numValues(); attVal++) {
countsPointer2[attVal] = 0;
}
}
m_Priors[classVal] = 0;
}
for(i = 0; i < instanceIndex.m_NumSeqInstsSet; i++) {
instance = (Instance) m_Instances.instance(instanceIndex.m_SequentialInstIndexes[i]);
for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
m_Counts[(int)instance.classValue()][AIndex][(int)instance.value(AIndex)]++;
}
m_Priors[(int)instance.classValue()]++;
}
}
/**
* Calculates the class membership probabilities.
* for the given test instance.
*
* @param instance the instance to be classified
* @param counts serving as all the counts of training data.
* @param priors serving as the number of instances in each class.
*
* @return predicted class probability distribution
* @exception Exception if distribution can't be computed
*/
public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception {
double sumForPriors = 0;
double sumForCounts = 0;
int attIndex, AIndex;
int numClassesOfInstance = instance.numClasses();
sumForPriors = 0;
sumForCounts = 0;
instanceIndex.setSequentialDataset(true);
// Calculate all of conditional probabilities.
sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance;
for (int j = 0; j < numClassesOfInstance; j++) {
// pointer to counts to make access more efficient in loop
int [][] countsPointer = m_Counts[j];
posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors);
for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
sumForCounts = Utils.sum(countsPointer[AIndex]);
if (!instance.isMissing(AIndex)) {
posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues()));
}
}
}
// Normalize probabilities
Utils.normalize(posteriorsArray);
return posteriorsArray;
}
/**
* Significance test
* binomp:
*
* @param double r, double n, double p.
* @return returns the probability of obtaining r or fewer out of n
* if the probability of an event is p.
*
*/
public double binomP(double r, double n, double p) throws Exception {
if (n == r) return 1.0;
return Statistics.incompleteBeta(n-r, r+1.0, 1.0-p);
}
/**
* Main method for testing this class.
*
* @param argv the options
*/
public static void main(String [] argv) {
Classifier scheme;
try {
scheme = new LBR();
System.out.println(Evaluation.evaluateModel(scheme, argv));
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -