📄 lbr.java
字号:
* * @param index the index of the Instance * */ public boolean getAttIndex(int index) { if(index < 0 || index >= m_NumAtts) throw new IllegalArgumentException("Invalid index value"); return m_AttIndexes[(int)index]; } /** * * Returns the boolean value at the specified index in the Sequential Attribute Indexes array * * @param index the index of the Attribute * */ public int getSequentialAttIndex(int index) { if(index < 0 || index >= m_NumAtts) throw new IllegalArgumentException("Invalid index value"); return m_SequentialAttIndexes[(int)index]; } /** * * Returns the number of instances "in use" * */ public int getNumInstancesSet() { return m_NumInstsSet; } /** * * Returns the number of instances in the dataset * */ public int getNumInstances() { return m_NumInstances; } /** * * Returns the number of instances in the Sequential array * */ public int getSequentialNumInstances() { // will always be the number set as the sequential array is for referencing only return m_NumSeqInstsSet; } /** * * Returns the number of attributes in the dataset * */ public int getNumAttributes() { return m_NumAtts; } /** * * Returns the number of attributes "in use" * */ public int getNumAttributesSet() { return m_NumAttsSet; } /** * * Returns the number of attributes in the Sequential array * */ public int getSequentialNumAttributes() { // will always be the number set as the sequential array is for referencing only return m_NumSeqAttsSet; } /** * * Returns whether or not the Sequential Instance Index requires rebuilding due to a change * */ public boolean isSequentialInstanceIndexValid() { return m_SequentialInstanceIndex_valid; } /** * * Returns whether or not the Sequential Attribute Index requires rebuilding due to a change * */ public boolean isSequentialAttIndexValid() { return m_SequentialAttIndex_valid; } /** * * Sets both the Instance and Attribute indexes to a specified value * */ public void setSequentialDataset(boolean value) { setSequentialInstanceIndex(value); setSequentialAttIndex(value); } /** * * A Sequential Instance index is all those Instances that are set to the specified value placed in a sequential array. * Each value in the sequential array contains the Instance index within the Indexes. * */ public void setSequentialInstanceIndex(boolean value) { if(m_SequentialInstanceIndex_valid == true) return; /* needs to be recalculated */ int size; size = m_NumInstsSet; m_SequentialInstIndexes = new int [(int)size]; int j = 0; for(int i = 0; i < m_NumInstances; i++) { if(m_InstIndexes[i] == value) { m_SequentialInstIndexes[j] = i; j++; } } m_SequentialInstanceIndex_valid = true; m_NumSeqInstsSet = j; } /** * * A Sequential Attribute index is all those Attributes that are set to the specified value placed in a sequential array. * Each value in the sequential array contains the Attribute index within the Indexes * */ public void setSequentialAttIndex(boolean value) { if(m_SequentialAttIndex_valid == true) return; /* needs to be recalculated */ int size; size = m_NumAttsSet; m_SequentialAttIndexes = new int [(int)size]; int j = 0; for(int i = 0; i < m_NumAtts; i++) { if(m_AttIndexes[i] == value) { m_SequentialAttIndexes[j] = i; j++; } } m_SequentialAttIndex_valid = true; m_NumSeqAttsSet = j; } } /* end of Indexes inner-class */ /** All the counts for nominal attributes. */ protected int [][][] m_Counts; /** All the counts for nominal attributes. */ protected int [][][] m_tCounts; /** The prior probabilities of the classes. */ protected int [] m_Priors; /** The prior probabilities of the classes. */ protected int [] m_tPriors; /** number of attributes for the dataset ***/ protected int m_numAtts; /** number of classes for dataset ***/ protected int m_numClasses; /** number of instances in dataset ***/ protected int m_numInsts; /** The set of instances used for current training. */ protected Instances m_Instances = null; // leave-one-out errors on the training dataset. protected int m_Errors; // leave-one-out error flags on the training dataaet. protected boolean [] m_ErrorFlags; // best attribute's index list. maybe as output result protected ArrayList leftHand = new ArrayList(); // significantly lower protected static final double SIGNLOWER = 0.05; // following is defined by wangzh // the number of instances to be classified incorrectly // on the subset. protected boolean [] m_subOldErrorFlags; // the number of instances to be classified incorrectly // besides the subset. protected int m_RemainderErrors = 0; // the number of instance to be processed protected int m_Number = 0; // the Number of Instances to be used in building a classifiers protected int m_NumberOfInstances = 0; // for printing in n-fold cross validation protected boolean m_NCV = false; // index of instances and attributes for the given dataset protected Indexes m_subInstances; // index of instances and attributes for the given dataset protected Indexes tempSubInstances; // probability values array protected double [] posteriorsArray; protected int bestCnt; protected int tempCnt; protected int forCnt; protected int whileCnt; /** * @return a description of the classifier suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Lazy Bayesian Rules Classifier. The naive Bayesian classifier provides a simple and effective approach to classifier learning, but its attribute independence assumption is often violated in the real world. Lazy Bayesian Rules selectively relaxes the independence assumption, achieving lower error rates over a range of learning tasks. LBR defers processing to classification time, making it a highly efficient and accurate classification algorithm when small numbers of objects are to be classified."; } /** * For lazy learning, building classifier is only to prepare their inputs * until classification time. * * @param instances set of instances serving as training data * @exception Exception if the preparation has not been generated. */ public void buildClassifier(Instances instances) throws Exception { int attIndex, i, j; bestCnt = 0; tempCnt = 0; forCnt = 0; whileCnt = 0; m_numAtts = instances.numAttributes(); // LBR requires nominal attibutes for (attIndex = 0; attIndex < m_numAtts; attIndex++) { Attribute attribute = (Attribute) instances.attribute(attIndex); if (attribute.isNumeric()) { throw new Exception("Can't handle numeric attributes! Descritize the dataset prior to using Lazy Bayesian Rules or use the Filtered Classifier"); } } if (instances.classAttribute().isNumeric()) { throw new Exception("LBR: Class is numeric!"); } m_numClasses = instances.numClasses(); if (m_numClasses < 0) { throw new Exception ("Dataset has no class attribute"); } m_numInsts = instances.numInstances(); // Reserve space m_Counts = new int[m_numClasses][m_numAtts][0]; m_Priors = new int[m_numClasses]; m_tCounts = new int[m_numClasses][m_numAtts][0]; m_tPriors = new int[m_numClasses]; m_subOldErrorFlags = new boolean[m_numInsts+1]; m_Instances = instances; m_subInstances = new Indexes(m_numInsts, m_numAtts, true, m_Instances.classIndex()); tempSubInstances = new Indexes(m_numInsts, m_numAtts, true, m_Instances.classIndex()); posteriorsArray = new double[m_numClasses]; // prepare arrays for (attIndex = 0; attIndex < m_numAtts; attIndex++) { Attribute attribute = (Attribute) instances.attribute(attIndex); for (j = 0; j < m_numClasses; j++) { m_Counts[j][attIndex] = new int[attribute.numValues()]; m_tCounts[j][attIndex] = new int[attribute.numValues()]; } } // Compute counts and priors for(i = 0; i < m_numInsts; i++) { Instance instance = (Instance) instances.instance(i); for(attIndex = 0; attIndex < m_numAtts; attIndex++) { m_tCounts[(int)instance.classValue()][attIndex][(int)instance.value(attIndex)]++; } m_tPriors[(int)instance.classValue()]++; } // Step 2: Leave-one-out on the training data set. // get m_Errors and its flags array using leave-one-out. m_ErrorFlags = new boolean[m_numInsts]; m_Errors = leaveOneOut(m_subInstances, m_tCounts, m_tPriors, m_ErrorFlags); if (m_Number == 0) { m_NumberOfInstances = m_Instances.numInstances(); } else { System.out.println(" "); System.out.println("N-Fold Cross Validation: "); m_NCV = true; } } /** * Calculates the class membership probabilities * for the given test instance. * This is the most important method for Lazy Bayesian Rule algorithm. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed */ public double[] distributionForInstance(Instance testInstance) throws Exception { int AIndex, attIndex; int inst; int subAttrIndex = 0; int subInstIndex = 0; int tempInstIndex = 0; int tempAttrIndex = 0; int attributeBest; Instance instance; int subLocalErrors = 0; Instance tempInstance; int tempErrorsBest = 0; boolean [] tempErrorFlagBest = null; int [] tempD_subsetBestInsts = null; int [] tempD_subsetBestAtts = null; Indexes tempD_subsetBest = null; Indexes subInstances = new Indexes(m_numInsts, m_numAtts, true, m_Instances.classIndex()); boolean [] subLocalErrorFlags = new boolean [(int)subInstances.getNumInstances()+1]; // Step 2': Get localErrors, localErrorFlags, and training data set. int localErrors = m_Errors; boolean [] localErrorFlags = (boolean []) m_ErrorFlags.clone(); // The number of errors on New, Not on Old in the subset. int errorsNewNotOld = 0; // The number of errors on Old, Not on New in the subset. int errorsOldNotNew = 0; // Step 3: leftHand.clear(); // Step 4: Beginning Repeat.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -