📄 osdlcore.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
   * @return an array of doubles representing the predicted    * probability distribution over the class labels   */  public double[] distributionForInstance(Instance instance) {    if (m_tuneInterpolationParameter 	&& !m_interpolationParameterValid) {      tuneInterpolationParameter();    }    if (!m_balanced) {      return distributionForInstance(instance, m_s).toArray();    }     // balanced variant    return distributionForInstanceBalanced(instance, m_s).toArray();  }  /**   * Calculates the cumulative class probabilities for the given test    * instance. Uses the current settings of the parameters if these are    * valid. If necessary it updates the interpolationparameter first,    * and hence this may change the classifier.   *   * @param instance the instance to be classified   * @return an array of doubles representing the predicted    * cumulative probability distribution over the class labels   */  public double[] cumulativeDistributionForInstance(Instance instance) {    if (m_tuneInterpolationParameter 	&& !m_interpolationParameterValid) {      tuneInterpolationParameter();    }    if (!m_balanced) {      return cumulativeDistributionForInstance(instance, m_s).toArray();    }     return cumulativeDistributionForInstanceBalanced(instance, m_s).toArray();  }  /**   * Calculates the class probabilities for the given test instance.   * Uses the interpolation parameter from the parameterlist, and   * always performs the ordinary or weighted OSDL algorithm,   * according to the current settings of the classifier.   * This method doesn't change the classifier.     *   * @param instance the instance to classify   * @param s value of the interpolationparameter to use   * @return the calculated distribution   */  private DiscreteDistribution distributionForInstance(Instance instance, double s) {    return new DiscreteDistribution(cumulativeDistributionForInstance(instance, s));  }  /**   * Calculates the class probabilities for the given test    * instance. Uses the interpolationparameter from the parameterlist, and   * always performs the balanced OSDL algorithm.   * This method doesn't change the classifier.     *   * @param instance the instance to classify   * @param s value of the interpolationparameter to use   * @return the calculated distribution   */  private DiscreteDistribution distributionForInstanceBalanced(      Instance instance, double s) {        return new DiscreteDistribution(cumulativeDistributionForInstanceBalanced(instance,s));  }  /**   * Calculates the cumulative class probabilities for the given test    * instance. Uses the interpolationparameter from the parameterlist, and   * always performs the ordinary or weighted OSDL algorithm,   * according to the current settings of the classifier.   * This method doesn't change the classifier.     *   * @param instance the instance to classify   * @param s value of the interpolationparameter to use   * @return the calculated distribution   */  private CumulativeDiscreteDistribution cumulativeDistributionForInstance(      Instance instance, double s) {        Coordinates xc = new Coordinates(instance);    int n = instance.numClasses();    int nrSmaller = 0;     int nrGreater = 0;    if (!containsSmallestElement()) {      // corresponds to adding the minimal element to the data space      nrSmaller = 1; // avoid division by zero    }    if (!containsBiggestElement()) {      // corresponds to adding the maximal element to the data space      nrGreater = 1; // avoid division by zero	    }    // Create fMin and fMax     CumulativeDiscreteDistribution fMin =      DistributionUtils.getMinimalCumulativeDiscreteDistribution(n);    CumulativeDiscreteDistribution fMax =      DistributionUtils.getMaximalCumulativeDiscreteDistribution(n);    // Cycle through all the map of cumulative distribution functions    for (Iterator i = m_estimatedCumulativeDistributions.keySet().iterator();    i.hasNext(); ) {      Coordinates yc = (Coordinates) i.next();      CumulativeDiscreteDistribution cdf = 	(CumulativeDiscreteDistribution) 	m_estimatedCumulativeDistributions.get(yc);      if (yc.equals(xc)) {	nrSmaller++;	fMin = DistributionUtils.takeMin(fMin,cdf);	nrGreater++;	fMax = DistributionUtils.takeMax(fMax,cdf);      } else if (yc.strictlySmaller(xc)) {	nrSmaller++;	fMin = DistributionUtils.takeMin(fMin,cdf);      } else if (xc.strictlySmaller(yc)) {	nrGreater++;	fMax = DistributionUtils.takeMax(fMax,cdf);      }    }    if (m_weighted) {      s = ( (double) nrSmaller) / (nrSmaller + nrGreater);      if (m_Debug) {	System.err.println("Weighted OSDL: interpolation parameter"	    + " is s = " + s);      }    }    // calculate s*fMin + (1-s)*fMax    return DistributionUtils.interpolate(fMin, fMax, 1 - s);  }  /**   * @return true if the learning examples contain an element for which    * the coordinates are the minimal element of the data space, false    * otherwise   */  private boolean containsSmallestElement() {    return m_estimatedCumulativeDistributions.containsKey(smallestElement);	  }  /**   * @return true if the learning examples contain an element for which    * the coordinates are the maximal element of the data space, false    * otherwise   */  private boolean containsBiggestElement() {    return m_estimatedCumulativeDistributions.containsKey(biggestElement);	  }  /**   * Calculates the cumulative class probabilities for the given test    * instance. Uses the interpolationparameter from the parameterlist, and   * always performs the single or double balanced OSDL algorithm.   * This method doesn't change the classifier.     *   * @param instance the instance to classify   * @param s value of the interpolationparameter to use   * @return the calculated distribution   */  private CumulativeDiscreteDistribution cumulativeDistributionForInstanceBalanced(      Instance instance, double s) {    Coordinates xc = new Coordinates(instance);    int n = instance.numClasses();    // n_m[i] represents the number of examples smaller or equal    // than xc and with a class label strictly greater than i    int[] n_m = new int[n];    // n_M[i] represents the number of examples greater or equal    // than xc and with a class label smaller or equal than i    int[] n_M = new int[n];    // Create fMin and fMax     CumulativeDiscreteDistribution fMin =      DistributionUtils.getMinimalCumulativeDiscreteDistribution(n);    CumulativeDiscreteDistribution fMax =      DistributionUtils.getMaximalCumulativeDiscreteDistribution(n);    // Cycle through all the map of cumulative distribution functions    for (Iterator i =       m_estimatedCumulativeDistributions.keySet().iterator();    i.hasNext(); ) {      Coordinates yc = (Coordinates) i.next();      CumulativeDiscreteDistribution cdf = 	(CumulativeDiscreteDistribution) 	m_estimatedCumulativeDistributions.get(yc);      if (yc.equals(xc)) {	// update n_m and n_M	DiscreteEstimator df = 	  (DiscreteEstimator) m_estimatedDistributions.get(yc);	updateN_m(n_m,df);	updateN_M(n_M,df);	fMin = DistributionUtils.takeMin(fMin,cdf);	fMax = DistributionUtils.takeMax(fMax,cdf);      } else if (yc.strictlySmaller(xc)) {	// update n_m 	DiscreteEstimator df = 	  (DiscreteEstimator) m_estimatedDistributions.get(yc);	updateN_m(n_m, df);	fMin = DistributionUtils.takeMin(fMin,cdf);      }      else if (xc.strictlySmaller(yc)) {	// update n_M	DiscreteEstimator df = 	  (DiscreteEstimator) m_estimatedDistributions.get(yc);	updateN_M(n_M, df);	fMax = DistributionUtils.takeMax(fMax,cdf);      }    }    double[] dd = new double[n];    // for each label decide what formula to use, either using    // n_m[i] and n_M[i] (if fMin[i]<fMax[i]) or using the    // interpolationparameter s or using the double balanced version    for (int i = 0; i < n; i++) {      double fmin = fMin.getCumulativeProbability(i);      double fmax = fMax.getCumulativeProbability(i);      if (m_weighted == true) { // double balanced version	if (fmin < fmax) { // reversed preference	  dd[i] =  (n_m[i] * fmin + n_M[i] * fmax) 	  / (n_m[i] + n_M[i]);	} else {	  if (n_m[i] + n_M[i] == 0) { // avoid division by zero	    dd[i] = s * fmin + (1 - s) * fmax;	  } else {	    dd[i] = (n_M[i] * fmin + n_m[i] * fmax) 	    / (n_m[i] + n_M[i]) ;	  }	}      } else {  // singly balanced version	dd[i] = (fmin < fmax) 	? (n_m[i] * fmin + n_M[i] * fmax) / (n_m[i] + n_M[i])	    : s * fmin + (1 - s) * fmax;      }    } try {      return new CumulativeDiscreteDistribution(dd);    } catch (IllegalArgumentException e) {      // this shouldn't happen.      System.err.println("We tried to create a cumulative "	  + "discrete distribution from the following array");      for (int i = 0; i < dd.length; i++) {	System.err.print(dd[i] + " ");      }      System.err.println();      throw new AssertionError(dd);    }  }  /**   * Update the array n_m using the given <code> DiscreteEstimator </code>.   *    * @param n_m the array n_m that will be updated.   * @param de the <code> DiscreteEstimator </code> that gives the    *        count over the different class labels.   */  private void updateN_m(int[] n_m, DiscreteEstimator de) {    int[] tmp = new int[n_m.length];    // all examples have a class labels strictly greater     // than 0, except those that have class label 0.    tmp[0] = (int) de.getSumOfCounts() - (int) de.getCount(0);    n_m[0] += tmp[0];    for (int i = 1; i < n_m.length; i++) {      // the examples with a class label strictly greater      // than i are exactly those that have a class label strictly      // greater than i-1, except those that have class label i.      tmp[i] = tmp[i - 1] - (int) de.getCount(i);      n_m[i] += tmp[i];    }    if (n_m[n_m.length - 1] != 0) {      // this shouldn't happen      System.err.println("******** Problem with n_m in " 	  + m_train.relationName());      System.err.println("Last argument is non-zero, namely : " 	  + n_m[n_m.length - 1]);    }  }  /**   * Update the array n_M using the given <code> DiscreteEstimator </code>.   *    * @param n_M the array n_M that will be updated.   * @param de the <code> DiscreteEstimator </code> that gives the    *        count over the different class labels.   */  private void updateN_M(int[] n_M, DiscreteEstimator de) {    int n = n_M.length;    int[] tmp = new int[n];    // all examples have a class label smaller or equal    // than n-1 (which is the maximum class label)    tmp[n - 1] = (int) de.getSumOfCounts();    n_M[n - 1] += tmp[n - 1];    for (int i = n - 2; i >= 0; i--) {      // the examples with a class label smaller or equal       // than i are exactly those that have a class label      // smaller or equal than i+1, except those that have       // class label i+1.      tmp[i] = tmp[i + 1] - (int) de.getCount(i + 1);      n_M[i] += tmp[i];    }  }  /**   * Builds the classifier.   * This means that all relevant examples are stored into memory.   * If necessary the interpolation parameter is tuned.   *   * @param instances the instances to be used for building the classifier   * @throws Exception if the classifier can't be built successfully   */  public void buildClassifier(Instances instances) throws Exception {    getCapabilities().testWithFail(instances);    // copy the dataset     m_train = new Instances(instances);    // new dataset in which examples with missing class value are removed    m_train.deleteWithMissingClass();    // build the Map for the estimatedDistributions     m_estimatedDistributions = new HashMap(m_train.numInstances()/2);    // cycle through all instances     for (Iterator it =       new EnumerationIterator(instances.enumerateInstances());     it.hasNext();) {      Instance instance = (Instance) it.next();      Coordinates c = new Coordinates(instance);      // get DiscreteEstimator from the map      DiscreteEstimator df = 	(DiscreteEstimator) m_estimatedDistributions.get(c);      // if no DiscreteEstimator is present in the map, create one       if (df == null) {	df = new DiscreteEstimator(instances.numClasses(),0);      }      df.addValue(instance.classValue(),instance.weight()); // update      m_estimatedDistributions.put(c,df); // put back in map    }    // build the map of cumulative distribution functions     m_estimatedCumulativeDistributions =       new HashMap(m_estimatedDistributions.size()/2);    // Cycle trough the map of discrete distributions, and create a new    // one containing cumulative discrete distributions    for (Iterator it=m_estimatedDistributions.keySet().iterator();    it.hasNext();) {      Coordinates c = (Coordinates) it.next();      DiscreteEstimator df = 	(DiscreteEstimator) m_estimatedDistributions.get(c);      m_estimatedCumulativeDistributions.put      (c, new CumulativeDiscreteDistribution(df));    }    // check if the interpolation parameter needs to be tuned    if (m_tuneInterpolationParameter && !m_interpolationParameterValid) {      tuneInterpolationParameter();    }    // fill in the smallest and biggest element (for use in the    // quasi monotone version of the algorithm)    double[] tmpAttValues = new double[instances.numAttributes()];    Instance instance = new Instance(1, tmpAttValues);    instance.setDataset(instances);    smallestElement = new Coordinates(instance);    if (m_Debug) {      System.err.println("minimal element of data space = " 	  + smallestElement);    }    for (int i = 0; i < tmpAttValues.length; i++) {      tmpAttValues[i] = instances.attribute(i).numValues() - 1;     }    instance = new Instance(1, tmpAttValues);    instance.setDataset(instances);    biggestElement = new Coordinates(instance);    if (m_Debug) {      System.err.println("maximal element of data space = " 	  + biggestElement);    }  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for    * displaying in the explorer/experimenter gui   */  public String classificationTypeTipText() {    return "Sets the way in which a single label will be extracted "    + "from the estimated distribution.";  }  /**   * Sets the classification type.  Currently <code> ctype </code>   * must be one of:   * <ul>   * <li> <code> CT_REGRESSION </code> : use expectation value of   * distribution.  (Non-ordinal in nature).   * <li> <code> CT_WEIGHTED_SUM </code> : use expectation value of   * distribution rounded to nearest class label. (Non-ordinal in   * nature).   * <li> <code> CT_MAXPROB </code> : use the mode of the distribution.   * (May deliver non-monotone results).   * <li> <code> CT_MEDIAN </code> : use the median of the distribution   * (rounded to the nearest class label).   * <li> <code> CT_MEDIAN_REAL </code> : use the median of the distribution   * but not rounded to the nearest class label.   * </ul>   *   * @param value the classification type
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -