📄 lbr.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
    // Selecting all the attributes that can be moved to the lefthand.    while ( localErrors >= 5 ){      attributeBest = -1;      whileCnt++;      // Step 5:      tempErrorsBest = subInstances.getNumInstancesSet() + 1;      subInstances.setSequentialDataset(true);      // Step 6: selecting an attribute.      for (int attr = 0; attr < subInstances.m_NumSeqAttsSet; attr++){        forCnt++;        subAttrIndex = subInstances.m_SequentialAttIndexes[attr];        // Step 7: get the corresponding subset.                m_RemainderErrors = 0;        // reset array to true        for(int i = 0; i < m_numInsts; i++) {          m_subOldErrorFlags[i] = true;        }        // reset indexes to reflect an empty dataset but with the same attrs as another dataset        tempSubInstances.resetDatasetBasedOn(subInstances);        // Get subset of the instances and its m_LastSecondErrors        for(inst = 0; inst < subInstances.m_NumSeqInstsSet; inst++) {          subInstIndex = subInstances.m_SequentialInstIndexes[inst];          if (m_Instances.instance(subInstIndex).value(subAttrIndex) == testInstance.value(subAttrIndex))  {            // add instance to subset list            tempSubInstances.setInstanceIndex(subInstIndex, true);            if (localErrorFlags[subInstIndex] == false ) {              m_subOldErrorFlags[subInstIndex] = false;            }          }          else  {            if (localErrorFlags[subInstIndex] == false ) {              m_RemainderErrors++;            }          }        } // end of for        // Step 7':        if (tempSubInstances.m_NumInstsSet < subInstances.m_NumInstsSet) {          // remove attribute from index          tempSubInstances.setAttIndex(subAttrIndex, false);          // Step 9: create a classifier on the subset.          // Compute counts and priors          // create sequential index of instances and attributes that are to be considered                          localNaiveBayes(tempSubInstances);                    subLocalErrors = leaveOneOut(tempSubInstances, m_Counts, m_Priors, subLocalErrorFlags);          errorsNewNotOld = 0;          errorsOldNotNew = 0;                    tempSubInstances.setSequentialDataset(true);                    for(int t_inst = 0; t_inst < tempSubInstances.m_NumSeqInstsSet; t_inst++) {            tempInstIndex = tempSubInstances.m_SequentialInstIndexes[t_inst];            if (subLocalErrorFlags[tempInstIndex] == false) {              // The number of errors on New, Not on Old in the subset.              if (m_subOldErrorFlags[tempInstIndex] == true) {                errorsNewNotOld ++;              }            } else {              // The number of errors on Old, Not on New in the subset.              if(m_subOldErrorFlags[tempInstIndex] == false) {                errorsOldNotNew ++;              }            }          } //end of for                    // Step 10 and Step 11:          int tempErrors = subLocalErrors + m_RemainderErrors;          // Step 12:          // Step 13: stopping criteria.          if((tempErrors < tempErrorsBest) && (binomP(errorsNewNotOld, errorsNewNotOld + errorsOldNotNew, 0.5 ) < SIGNLOWER))      {            // Step 14:            tempCnt++;            // --------------------------------------------------            //tempD_subsetBest = new Indexes(tempSubInstances);                        // -------------------------------------------------------------------------------            tempSubInstances.setSequentialDataset(true);            tempD_subsetBestInsts = (int []) tempSubInstances.m_SequentialInstIndexes.clone();            tempD_subsetBestAtts = (int []) tempSubInstances.m_SequentialAttIndexes.clone();            // -------------------------------------------------------------------------------            // Step 15:            tempErrorsBest = tempErrors;            tempErrorFlagBest = (boolean []) subLocalErrorFlags.clone();            // Step 16:            attributeBest = subAttrIndex;          } // end of if        } // end of if      } // end of main for            // Step 20:      if(attributeBest != -1)  {        bestCnt++;        // Step 21:        leftHand.add(testInstance.attribute(attributeBest));        // ------------------------------------------------        // Step 22:        //tempD_subsetBest.setAttIndex(attributeBest, false);        //subInstances = tempD_subsetBest;        // ------------------------------------------------         subInstances.setInsts(tempD_subsetBestInsts, true);        subInstances.setAtts(tempD_subsetBestAtts, true);        subInstances.setAttIndex(attributeBest, false);        // -------------------------------------------------        // Step 25:        localErrors = tempErrorsBest;        localErrorFlags =  tempErrorFlagBest;              } else {        break;      }    } // end of while        // Step 27:    localNaiveBayes(subInstances);    return localDistributionForInstance(testInstance, subInstances);  }    /**   * Returns a description of the classifier.   *   * @return a description of the classifier as a string.   */  public String toString() {        if (m_Instances == null) {      return "Lazy Bayesian Rule: No model built yet.";    }        try {      StringBuffer text = new StringBuffer      ("=== LBR Run information ===\n\n");            text.append("Scheme:       weka.classifiers.LBR\n");            text.append("Relation:     "      + m_Instances.attribute(m_Instances.classIndex()).name()      + "\n");            text.append("Instances:    "+m_Instances.numInstances()+"\n");            text.append("Attributes:   "+m_Instances.numAttributes()+"\n");            // Remains are printed by Evaulation.java      return text.toString();    } catch (Exception e) {      e.printStackTrace();      return "Can't Print Lazy Bayes Rule Classifier!";    }  }  /**   * Leave-one-out strategy. For a given sample data set with n instances,   * using (n - 1) instances by leaving one out and tested on the single   * remaining case.   * This is repeated n times in turn.   * The final "Error" is the sum of the instances to be classified   * incorrectly.   *   * @param instances set of instances serving as training data.   * @param counts serving as all the counts of training data.   * @param priors serving as the number of instances in each class.   *   * @return error flag array about each instance.   *   **/  public int leaveOneOut(Indexes instanceIndex, int [][][] counts, int [] priors, boolean [] errorFlags)  throws Exception {            // ###### START LEAVE ONE OUT #############    int tempClassValue;    double posteriors;    double sumForPriors;    double sumForCounts;    double max = 0;    int maxIndex = 0;    int AIndex, attIndex, clss;    int inst;    int errors = 0;    int instIndex;    Instance instance;        instanceIndex.setSequentialDataset(true);    int tempInstanceClassValue;    int [] tempAttributeValues = new int[(int)instanceIndex.m_NumSeqAttsSet+1];    Instance tempInstance;    for(inst = 0; inst < instanceIndex.m_NumSeqInstsSet; inst++) {      instIndex = instanceIndex.m_SequentialInstIndexes[inst];      //get the leave-one-out instance      tempInstance = (Instance) m_Instances.instance(instIndex);      if (!tempInstance.classIsMissing()) {      tempInstanceClassValue = (int)tempInstance.classValue();      // Compute the counts and priors for (n-1) instances.      for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {        AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];        tempAttributeValues[attIndex] = (int)tempInstance.value(AIndex);        counts[tempInstanceClassValue][AIndex][tempAttributeValues[attIndex]]--;      }            priors[tempInstanceClassValue]--;      max = 0;      maxIndex= 0;      // ###### LOCAL CLASSIFY INSTANCE ###########      sumForPriors = Utils.sum(priors);      for (clss = 0; clss < m_numClasses; clss++) {        posteriors = 0.0;        posteriors = (priors[clss] + 1) / (sumForPriors + m_numClasses);                for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {          AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];          if (!tempInstance.isMissing(AIndex)) {            sumForCounts = Utils.sum(counts[clss][AIndex]);            posteriors *= ((counts[clss][AIndex][tempAttributeValues[attIndex]] + 1) / (sumForCounts + (double)tempInstance.attribute(AIndex).numValues()));          }        }                if (posteriors > max) {          maxIndex = clss;          max = posteriors;        }      } // end of for            if (max > 0) {        tempClassValue = maxIndex;      } else {        tempClassValue = (int)tempInstance.missingValue();      }      // ###### END LOCAL CLASSIFY INSTANCE ###########            // Adjudge error. Here using classIndex is incorrect,      // it is index of the class attribute.      if(tempClassValue == tempInstanceClassValue){        errorFlags[instIndex] = true;      } else {        errorFlags[instIndex] = false;        errors++;      }            for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {        AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];        counts[tempInstanceClassValue][AIndex][tempAttributeValues[attIndex]]++;      }            priors[tempInstanceClassValue]++;      }    } // end of for    // ###### END LEAVE ONE OUT #############    return errors;  }   /**   * Class for building and using a simple Naive Bayes classifier.   * For more information, see<p>   *   * Richard Duda and Peter Hart (1973).<i>Pattern   * Classification and Scene Analysis</i>. Wiley, New York.   *   * This method only get m_Counts and m_Priors.   *   * @param instances set of instances serving as training data   * @exception Exception if m_Counts and m_Priors have not been   *  generated successfully   */  public void localNaiveBayes(Indexes instanceIndex) throws Exception {    int attIndex = 0;    int i, v, AIndex;    int attVal = 0;    int classVal = 0;    Instance instance;    instanceIndex.setSequentialDataset(true);    // reset local counts    for(classVal = 0; classVal < m_numClasses; classVal++) {      for(attIndex = 0; attIndex < m_numAtts; attIndex++) {        Attribute attribute = m_Instances.attribute(attIndex);        for(attVal = 0; attVal < attribute.numValues(); attVal++)  {          m_Counts[classVal][attIndex][attVal] = 0;        }     }     m_Priors[classVal] = 0;   }    for(i = 0; i < instanceIndex.m_NumSeqInstsSet; i++) {      instance = (Instance) m_Instances.instance(instanceIndex.m_SequentialInstIndexes[i]);      for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {        AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];        m_Counts[(int)instance.classValue()][AIndex][(int)instance.value(AIndex)]++;      }      m_Priors[(int)instance.classValue()]++;    }  }      /**   * Calculates the class membership probabilities.   * for the given test instance.   *   * @param instance the instance to be classified   * @param counts serving as all the counts of training data.   * @param priors serving as the number of instances in each class.   *   * @return predicted class probability distribution   * @exception Exception if distribution can't be computed   */  public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception {        double sumForPriors = 0;    double sumForCounts = 0;    int attIndex, AIndex;    int numClassesOfInstance = instance.numClasses();        sumForPriors = 0;    sumForCounts = 0;    instanceIndex.setSequentialDataset(true);    // Calculate all of conditional probabilities.    sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance;    for (int j = 0; j < numClassesOfInstance; j++) {      posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors);      for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {        AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];        sumForCounts = Utils.sum(m_Counts[j][AIndex]);        if (!instance.isMissing(AIndex)) {          posteriorsArray[j] *= ((m_Counts[j][AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues()));        }      }    }        // Normalize probabilities    Utils.normalize(posteriorsArray);        return posteriorsArray;  }    /**   * Significance test   * binomp:   *   * @param double  r, double  n, double p.   * @return returns the probability of obtaining r or fewer out of n   * if the probability of an event is p.   *   */  public double binomP(double r, double n, double p) throws Exception {        if (n == r) return 1.0;    return Statistics.incompleteBeta(n-r, r+1.0, 1.0-p);  }    /**   * Main method for testing this class.   *   * @param argv the options   */  public static void main(String [] argv) {        Classifier scheme;        try {      scheme = new LBR();      System.out.println(Evaluation.evaluateModel(scheme, argv));    } catch (Exception e) {      System.err.println(e.getMessage());    }  }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -