📄 osdlcore.java
字号:
throw new IllegalArgumentException("Length of array is not sufficient"); } if (!interpolationParametersValid(sLow, sUp, sNrParts)) { throw new IllegalArgumentException("Interpolation parameters are not valid"); } if (!classificationTypeValid(ctype)) { throw new IllegalArgumentException("Not a valid classification type " + ctype); } Arrays.fill(performanceStats, 0, sNrParts + 1, 0); // cycle through all instances for (Iterator it = new EnumerationIterator(m_train.enumerateInstances()); it.hasNext(); ) { Instance instance = (Instance) it.next(); double classValue = instance.classValue(); removeInstance(instance); double s = sLow; double step = (sUp - sLow) / sNrParts; //step size for (int i = 0; i <= sNrParts; i++, s += step) { try { performanceStats[i] += lossFunction.loss(classValue, classifyInstance(instance, s, ctype)); } catch (Exception exception) { // XXX what should I do here, normally we shouldn't be here System.err.println(exception.getMessage()); System.exit(1); } } // XXX may be done more efficiently addInstance(instance); // update } // select the 'best' value for s // to this end, we sort the array with the leave-one-out // performance statistics, and we choose the middle one // off all those that score 'best' // new code, august 2004 // new code, june 2005. If performanceStats is longer than // necessary, copy it first double[] tmp = performanceStats; if (performanceStats.length > sNrParts + 1) { tmp = new double[sNrParts + 1]; System.arraycopy(performanceStats, 0, tmp, 0, tmp.length); } int[] sort = Utils.stableSort(tmp); int minIndex = 0; while (minIndex + 1 < tmp.length && tmp[sort[minIndex + 1]] == tmp[sort[minIndex]]) { minIndex++; } minIndex = sort[minIndex / 2]; // middle one // int minIndex = Utils.minIndex(performanceStats); // OLD code return sLow + minIndex * (sUp - sLow) / sNrParts; } /** * Checks if <code> ctype </code> is a valid classification * type. * @param ctype the int to be checked * @return true if ctype is a valid classification type, false otherwise */ private boolean classificationTypeValid(int ctype) { return ctype == CT_REGRESSION || ctype == CT_WEIGHTED_SUM || ctype == CT_MAXPROB || ctype == CT_MEDIAN || ctype == CT_MEDIAN_REAL; } /** * Checks if the given parameters are valid interpolation parameters. * @param sLow lower bound for the interval * @param sUp upper bound for the interval * @param sNrParts the number of parts the interval has to be divided in * @return true is the given parameters are valid interpolation parameters, * false otherwise */ private boolean interpolationParametersValid(double sLow, double sUp, int sNrParts) { return sLow >= 0 && sUp <= 1 && sLow < sUp && sNrParts > 0 || sLow == sUp && sNrParts == 0; // special case included } /** * Remove an instance from the classifier. Updates the hashmaps. * @param instance the instance to be removed. */ private void removeInstance(Instance instance) { Coordinates c = new Coordinates(instance); // Remove instance temporarily from the Maps with the distributions DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(c); // remove from df df.addValue(instance.classValue(),-instance.weight()); if (Math.abs(df.getSumOfCounts() - 0) < Utils.SMALL) { /* There was apparently only one example with coordinates c * in the training set, and now we removed it. * Remove the key c from both maps. */ m_estimatedDistributions.remove(c); m_estimatedCumulativeDistributions.remove(c); } else { // update both maps m_estimatedDistributions.put(c,df); m_estimatedCumulativeDistributions.put (c, new CumulativeDiscreteDistribution(df)); } } /** * Update the classifier using the given instance. Updates the hashmaps * @param instance the instance to be added */ private void addInstance(Instance instance) { Coordinates c = new Coordinates(instance); // Get DiscreteEstimator from the map DiscreteEstimator df = (DiscreteEstimator) m_estimatedDistributions.get(c); // If no DiscreteEstimator is present in the map, create one if (df == null) { df = new DiscreteEstimator(instance.dataset().numClasses(),0); } df.addValue(instance.classValue(),instance.weight()); // update df m_estimatedDistributions.put(c,df); // put back in map m_estimatedCumulativeDistributions.put (c, new CumulativeDiscreteDistribution(df)); } /** * Returns an enumeration describing the available options. * For a list of available options, see <code> setOptions </code>. * * @return an enumeration of all available options. */ public Enumeration listOptions() { Vector options = new Vector(); Enumeration enm = super.listOptions(); while (enm.hasMoreElements()) options.addElement(enm.nextElement()); String description = "\tSets the classification type to be used.\n" + "\t(Default: " + new SelectedTag(CT_MEDIAN, TAGS_CLASSIFICATIONTYPES) + ")"; String synopsis = "-C " + Tag.toOptionList(TAGS_CLASSIFICATIONTYPES); String name = "C"; options.addElement(new Option(description, name, 1, synopsis)); description = "\tUse the balanced version of the " + "Ordinal Stochastic Dominance Learner"; synopsis = "-B"; name = "B"; options.addElement(new Option(description, name, 1, synopsis)); description = "\tUse the weighted version of the " + "Ordinal Stochastic Dominance Learner"; synopsis = "-W"; name = "W"; options.addElement(new Option(description, name, 1, synopsis)); description = "\tSets the value of the interpolation parameter (not with -W/T/P/L/U)\n" + "\t(default: 0.5)."; synopsis = "-S <value of interpolation parameter>"; name = "S"; options.addElement(new Option(description, name, 1, synopsis)); description = "\tTune the interpolation parameter (not with -W/S)\n" + "\t(default: off)"; synopsis = "-T"; name = "T"; options.addElement(new Option(description, name, 0, synopsis)); description = "\tLower bound for the interpolation parameter (not with -W/S)\n" + "\t(default: 0)"; synopsis = "-L <Lower bound for interpolation parameter>"; name="L"; options.addElement(new Option(description, name, 1, synopsis)); description = "\tUpper bound for the interpolation parameter (not with -W/S)\n" + "\t(default: 1)"; synopsis = "-U <Upper bound for interpolation parameter>"; name="U"; options.addElement(new Option(description, name, 1, synopsis)); description = "\tDetermines the step size for tuning the interpolation\n" + "\tparameter, nl. (U-L)/P (not with -W/S)\n" + "\t(default: 10)"; synopsis = "-P <Number of parts>"; name="P"; options.addElement(new Option(description, name, 1, synopsis)); return options.elements(); } /** * Parses the options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -C <REG|WSUM|MAX|MED|RMED> * Sets the classification type to be used. * (Default: MED)</pre> * * <pre> -B * Use the balanced version of the Ordinal Stochastic Dominance Learner</pre> * * <pre> -W * Use the weighted version of the Ordinal Stochastic Dominance Learner</pre> * * <pre> -S <value of interpolation parameter> * Sets the value of the interpolation parameter (not with -W/T/P/L/U) * (default: 0.5).</pre> * * <pre> -T * Tune the interpolation parameter (not with -W/S) * (default: off)</pre> * * <pre> -L <Lower bound for interpolation parameter> * Lower bound for the interpolation parameter (not with -W/S) * (default: 0)</pre> * * <pre> -U <Upper bound for interpolation parameter> * Upper bound for the interpolation parameter (not with -W/S) * (default: 1)</pre> * * <pre> -P <Number of parts> * Determines the step size for tuning the interpolation * parameter, nl. (U-L)/P (not with -W/S) * (default: 10)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String args; args = Utils.getOption('C',options); if (args.length() != 0) setClassificationType(new SelectedTag(args, TAGS_CLASSIFICATIONTYPES)); else setClassificationType(new SelectedTag(CT_MEDIAN, TAGS_CLASSIFICATIONTYPES)); setBalanced(Utils.getFlag('B',options)); if (Utils.getFlag('W', options)) { m_weighted = true; // ignore any T, S, P, L and U options Utils.getOption('T', options); Utils.getOption('S', options); Utils.getOption('P', options); Utils.getOption('L', options); Utils.getOption('U', options); } else { m_tuneInterpolationParameter = Utils.getFlag('T', options); if (!m_tuneInterpolationParameter) { // ignore P, L, U Utils.getOption('P', options); Utils.getOption('L', options); Utils.getOption('U', options); // value of s args = Utils.getOption('S',options); if (args.length() != 0) setInterpolationParameter(Double.parseDouble(args)); else setInterpolationParameter(0.5); } else { // ignore S Utils.getOption('S', options); args = Utils.getOption('L',options); double l = m_sLower; if (args.length() != 0) l = Double.parseDouble(args); else l = 0.0; args = Utils.getOption('U',options); double u = m_sUpper; if (args.length() != 0) u = Double.parseDouble(args); else u = 1.0; if (m_tuneInterpolationParameter) setInterpolationParameterBounds(l, u); args = Utils.getOption('P',options); if (args.length() != 0) setNumberOfPartsForInterpolationParameter(Integer.parseInt(args)); else setNumberOfPartsForInterpolationParameter(10); } } super.setOptions(options); } /** * Gets the current settings of the OSDLCore classifier. * * @return an array of strings suitable for passing * to <code> setOptions </code> */ public String[] getOptions() { int i; Vector result; String[] options; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); // classification type result.add("-C"); result.add("" + getClassificationType()); if (m_balanced) result.add("-B"); if (m_weighted) { result.add("-W"); } else { // interpolation parameter if (!m_tuneInterpolationParameter) { result.add("-S"); result.add(Double.toString(m_s)); } else { result.add("-T"); result.add("-L"); result.add(Double.toString(m_sLower)); result.add("-U"); result.add(Double.toString(m_sUpper)); result.add("-P"); result.add(Integer.toString(m_sNrParts)); } } return (String[]) result.toArray(new String[result.size()]); } /** * Returns a description of the classifier. * Attention: if debugging is on, the description can be become * very lengthy. * * @return a string containing the description */ public String toString() { StringBuffer sb = new StringBuffer(); // balanced or ordinary OSDL if (m_balanced) { sb.append("Balanced OSDL\n=============\n\n"); } else { sb.append("Ordinary OSDL\n=============\n\n"); } if (m_weighted) { sb.append("Weighted variant\n"); } // classification type used sb.append("Classification type: " + getClassificationType() + "\n"); // parameter s if (!m_weighted) { sb.append("Interpolation parameter: " + m_s + "\n"); if (m_tuneInterpolationParameter) { sb.append("Bounds and stepsize: " + m_sLower + " " + m_sUpper + " " + m_sNrParts + "\n"); if (!m_interpolationParameterValid) { sb.append("Interpolation parameter is not valid"); } } } if(m_Debug) { if (m_estimatedCumulativeDistributions != null) { /* * Cycle through all the map of cumulative distribution functions * and print each cumulative distribution function */ for (Iterator i = m_estimatedCumulativeDistributions.keySet().iterator(); i.hasNext(); ) { Coordinates yc = (Coordinates) i.next(); CumulativeDiscreteDistribution cdf = (CumulativeDiscreteDistribution) m_estimatedCumulativeDistributions.get(yc); sb.append( "[" + yc.hashCode() + "] " + yc.toString() + " --> " + cdf.toString() + "\n"); } } } return sb.toString(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -