📄 osdlcore.java
字号:
* @return an array of doubles representing the predicted * probability distribution over the class labels */ public double[] distributionForInstance(Instance instance) { if (m_tuneInterpolationParameter && !m_interpolationParameterValid) { tuneInterpolationParameter(); } if (!m_balanced) { return distributionForInstance(instance, m_s).toArray(); } // balanced variant return distributionForInstanceBalanced(instance, m_s).toArray(); } /** * Calculates the cumulative class probabilities for the given test * instance. Uses the current settings of the parameters if these are * valid. If necessary it updates the interpolationparameter first, * and hence this may change the classifier. * * @param instance the instance to be classified * @return an array of doubles representing the predicted * cumulative probability distribution over the class labels */ public double[] cumulativeDistributionForInstance(Instance instance) { if (m_tuneInterpolationParameter && !m_interpolationParameterValid) { tuneInterpolationParameter(); } if (!m_balanced) { return cumulativeDistributionForInstance(instance, m_s).toArray(); } return cumulativeDistributionForInstanceBalanced(instance, m_s).toArray(); } /** * Calculates the class probabilities for the given test instance. * Uses the interpolation parameter from the parameterlist, and * always performs the ordinary or weighted OSDL algorithm, * according to the current settings of the classifier. * This method doesn't change the classifier. * * @param instance the instance to classify * @param s value of the interpolationparameter to use * @return the calculated distribution */ private DiscreteDistribution distributionForInstance(Instance instance, double s) { return new DiscreteDistribution(cumulativeDistributionForInstance(instance, s)); } /** * Calculates the class probabilities for the given test * instance. Uses the interpolationparameter from the parameterlist, and * always performs the balanced OSDL algorithm. * This method doesn't change the classifier. * * @param instance the instance to classify * @param s value of the interpolationparameter to use * @return the calculated distribution */ private DiscreteDistribution distributionForInstanceBalanced( Instance instance, double s) { return new DiscreteDistribution(cumulativeDistributionForInstanceBalanced(instance,s)); } /** * Calculates the cumulative class probabilities for the given test * instance. Uses the interpolationparameter from the parameterlist, and * always performs the ordinary or weighted OSDL algorithm, * according to the current settings of the classifier. * This method doesn't change the classifier. * * @param instance the instance to classify * @param s value of the interpolationparameter to use * @return the calculated distribution */ private CumulativeDiscreteDistribution cumulativeDistributionForInstance( Instance instance, double s) { Coordinates xc = new Coordinates(instance); int n = instance.numClasses(); int nrSmaller = 0; int nrGreater = 0; if (!containsSmallestElement()) { // corresponds to adding the minimal element to the data space nrSmaller = 1; // avoid division by zero } if (!containsBiggestElement()) { // corresponds to adding the maximal element to the data space nrGreater = 1; // avoid division by zero } // Create fMin and fMax CumulativeDiscreteDistribution fMin = DistributionUtils.getMinimalCumulativeDiscreteDistribution(n); CumulativeDiscreteDistribution fMax = DistributionUtils.getMaximalCumulativeDiscreteDistribution(n); // Cycle through all the map of cumulative distribution functions for (Iterator i = m_estimatedCumulativeDistributions.keySet().iterator(); i.hasNext(); ) { Coordinates yc = (Coordinates) i.next(); CumulativeDiscreteDistribution cdf = (CumulativeDiscreteDistribution) m_estimatedCumulativeDistributions.get(yc); if (yc.equals(xc)) { nrSmaller++; fMin = DistributionUtils.takeMin(fMin,cdf); nrGreater++; fMax = DistributionUtils.takeMax(fMax,cdf); } else if (yc.strictlySmaller(xc)) { nrSmaller++; fMin = DistributionUtils.takeMin(fMin,cdf); } else if (xc.strictlySmaller(yc)) { nrGreater++; fMax = DistributionUtils.takeMax(fMax,cdf); } } if (m_weighted) { s = ( (double) nrSmaller) / (nrSmaller + nrGreater); if (m_Debug) { System.err.println("Weighted OSDL: interpolation parameter" + " is s = " + s); } } // calculate s*fMin + (1-s)*fMax return DistributionUtils.interpolate(fMin, fMax, 1 - s); } /** * @return true if the learning examples contain an element for which * the coordinates are the minimal element of the data space, false * otherwise */ private boolean containsSmallestElement() { return m_estimatedCumulativeDistributions.containsKey(smallestElement); } /** * @return true if the learning examples contain an element for which * the coordinates are the maximal element of the data space, false * otherwise */ private boolean containsBiggestElement() { return m_estimatedCumulativeDistributions.containsKey(biggestElement); } /** * Calculates the cumulative class probabilities for the given test * instance. Uses the interpolationparameter from the parameterlist, and * always performs the single or double balanced OSDL algorithm. * This method doesn't change the classifier. * * @param instance the instance to classify * @param s value of the interpolationparameter to use * @return the calculated distribution */ private CumulativeDiscreteDistribution cumulativeDistributionForInstanceBalanced( Instance instance, double s) { Coordinates xc = new Coordinates(instance); int n = instance.numClasses(); // n_m[i] represents the number of examples smaller or equal // than xc and with a class label strictly greater than i int[] n_m = new int[n]; // n_M[i] represents the number of examples greater or equal // than xc and with a class label smaller or equal than i int[] n_M = new int[n]; // Create fMin and fMax CumulativeDiscreteDistribution fMin = DistributionUtils.getMinimalCumulativeDiscreteDistribution(n); CumulativeDiscreteDistribution fMax = DistributionUtils.getMaximalCumulativeDiscreteDistribution(n); // Cycle through all the map of cumulative distribution functions for (Iterator i = m_estimatedCumulativeDistributions.keySet().iterator(); i.hasNext(); ) { Coordinates yc = (Coordinates) i.next(); CumulativeDiscreteDistribution cdf = (CumulativeDiscreteDistribution) m_estimatedCumulativeDistributions.get(yc); if (yc.equals(xc)) { // update n_m and n_M DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(yc); updateN_m(n_m,df); updateN_M(n_M,df); fMin = DistributionUtils.takeMin(fMin,cdf); fMax = DistributionUtils.takeMax(fMax,cdf); } else if (yc.strictlySmaller(xc)) { // update n_m DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(yc); updateN_m(n_m, df); fMin = DistributionUtils.takeMin(fMin,cdf); } else if (xc.strictlySmaller(yc)) { // update n_M DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(yc); updateN_M(n_M, df); fMax = DistributionUtils.takeMax(fMax,cdf); } } double[] dd = new double[n]; // for each label decide what formula to use, either using // n_m[i] and n_M[i] (if fMin[i]<fMax[i]) or using the // interpolationparameter s or using the double balanced version for (int i = 0; i < n; i++) { double fmin = fMin.getCumulativeProbability(i); double fmax = fMax.getCumulativeProbability(i); if (m_weighted == true) { // double balanced version if (fmin < fmax) { // reversed preference dd[i] = (n_m[i] * fmin + n_M[i] * fmax) / (n_m[i] + n_M[i]); } else { if (n_m[i] + n_M[i] == 0) { // avoid division by zero dd[i] = s * fmin + (1 - s) * fmax; } else { dd[i] = (n_M[i] * fmin + n_m[i] * fmax) / (n_m[i] + n_M[i]) ; } } } else { // singly balanced version dd[i] = (fmin < fmax) ? (n_m[i] * fmin + n_M[i] * fmax) / (n_m[i] + n_M[i]) : s * fmin + (1 - s) * fmax; } } try { return new CumulativeDiscreteDistribution(dd); } catch (IllegalArgumentException e) { // this shouldn't happen. System.err.println("We tried to create a cumulative " + "discrete distribution from the following array"); for (int i = 0; i < dd.length; i++) { System.err.print(dd[i] + " "); } System.err.println(); throw new AssertionError(dd); } } /** * Update the array n_m using the given <code> DiscreteEstimator </code>. * * @param n_m the array n_m that will be updated. * @param de the <code> DiscreteEstimator </code> that gives the * count over the different class labels. */ private void updateN_m(int[] n_m, DiscreteEstimator de) { int[] tmp = new int[n_m.length]; // all examples have a class labels strictly greater // than 0, except those that have class label 0. tmp[0] = (int) de.getSumOfCounts() - (int) de.getCount(0); n_m[0] += tmp[0]; for (int i = 1; i < n_m.length; i++) { // the examples with a class label strictly greater // than i are exactly those that have a class label strictly // greater than i-1, except those that have class label i. tmp[i] = tmp[i - 1] - (int) de.getCount(i); n_m[i] += tmp[i]; } if (n_m[n_m.length - 1] != 0) { // this shouldn't happen System.err.println("******** Problem with n_m in " + m_train.relationName()); System.err.println("Last argument is non-zero, namely : " + n_m[n_m.length - 1]); } } /** * Update the array n_M using the given <code> DiscreteEstimator </code>. * * @param n_M the array n_M that will be updated. * @param de the <code> DiscreteEstimator </code> that gives the * count over the different class labels. */ private void updateN_M(int[] n_M, DiscreteEstimator de) { int n = n_M.length; int[] tmp = new int[n]; // all examples have a class label smaller or equal // than n-1 (which is the maximum class label) tmp[n - 1] = (int) de.getSumOfCounts(); n_M[n - 1] += tmp[n - 1]; for (int i = n - 2; i >= 0; i--) { // the examples with a class label smaller or equal // than i are exactly those that have a class label // smaller or equal than i+1, except those that have // class label i+1. tmp[i] = tmp[i + 1] - (int) de.getCount(i + 1); n_M[i] += tmp[i]; } } /** * Builds the classifier. * This means that all relevant examples are stored into memory. * If necessary the interpolation parameter is tuned. * * @param instances the instances to be used for building the classifier * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { getCapabilities().testWithFail(instances); // copy the dataset m_train = new Instances(instances); // new dataset in which examples with missing class value are removed m_train.deleteWithMissingClass(); // build the Map for the estimatedDistributions m_estimatedDistributions = new HashMap(m_train.numInstances()/2); // cycle through all instances for (Iterator it = new EnumerationIterator(instances.enumerateInstances()); it.hasNext();) { Instance instance = (Instance) it.next(); Coordinates c = new Coordinates(instance); // get DiscreteEstimator from the map DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(c); // if no DiscreteEstimator is present in the map, create one if (df == null) { df = new DiscreteEstimator(instances.numClasses(),0); } df.addValue(instance.classValue(),instance.weight()); // update m_estimatedDistributions.put(c,df); // put back in map } // build the map of cumulative distribution functions m_estimatedCumulativeDistributions = new HashMap(m_estimatedDistributions.size()/2); // Cycle trough the map of discrete distributions, and create a new // one containing cumulative discrete distributions for (Iterator it=m_estimatedDistributions.keySet().iterator(); it.hasNext();) { Coordinates c = (Coordinates) it.next(); DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(c); m_estimatedCumulativeDistributions.put (c, new CumulativeDiscreteDistribution(df)); } // check if the interpolation parameter needs to be tuned if (m_tuneInterpolationParameter && !m_interpolationParameterValid) { tuneInterpolationParameter(); } // fill in the smallest and biggest element (for use in the // quasi monotone version of the algorithm) double[] tmpAttValues = new double[instances.numAttributes()]; Instance instance = new Instance(1, tmpAttValues); instance.setDataset(instances); smallestElement = new Coordinates(instance); if (m_Debug) { System.err.println("minimal element of data space = " + smallestElement); } for (int i = 0; i < tmpAttValues.length; i++) { tmpAttValues[i] = instances.attribute(i).numValues() - 1; } instance = new Instance(1, tmpAttValues); instance.setDataset(instances); biggestElement = new Coordinates(instance); if (m_Debug) { System.err.println("maximal element of data space = " + biggestElement); } } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String classificationTypeTipText() { return "Sets the way in which a single label will be extracted " + "from the estimated distribution."; } /** * Sets the classification type. Currently <code> ctype </code> * must be one of: * <ul> * <li> <code> CT_REGRESSION </code> : use expectation value of * distribution. (Non-ordinal in nature). * <li> <code> CT_WEIGHTED_SUM </code> : use expectation value of * distribution rounded to nearest class label. (Non-ordinal in * nature). * <li> <code> CT_MAXPROB </code> : use the mode of the distribution. * (May deliver non-monotone results). * <li> <code> CT_MEDIAN </code> : use the median of the distribution * (rounded to the nearest class label). * <li> <code> CT_MEDIAN_REAL </code> : use the median of the distribution * but not rounded to the nearest class label. * </ul> * * @param value the classification type
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -