📄 tools.java
字号:
/* * YALE - Yet Another Learning Environment * Copyright (C) 2002, 2003 * Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, * Katharina Morik, Oliver Ritthoff * Artificial Intelligence Unit * Computer Science Department * University of Dortmund * 44221 Dortmund, Germany * email: yale@ls8.cs.uni-dortmund.de * web: http://yale.cs.uni-dortmund.de/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */package edu.udo.cs.yale.example;import edu.udo.cs.yale.operator.OperatorException;import edu.udo.cs.yale.operator.FatalException;import edu.udo.cs.yale.operator.UserError;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.tools.RandomGenerator;import edu.udo.cs.yale.generator.FeatureGenerator;import java.util.List;import java.util.LinkedList;import java.util.Iterator;/** Provides some tools for calculation of certain measures and feature generation. * * @version $Id: Tools.java,v 2.4 2003/06/18 17:11:17 fischer Exp $ */public class Tools { /** Returns the Attribute with the best information gain. * * @param ratioGain if true, the ratio gain criterion is used */ public static Attribute getMostInformativeAttribute(ExampleSet exampleSet, boolean ratioGain) throws OperatorException { if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(exampleSet.getLabel().getValueType(), Ontology.NOMINAL)) { throw new UserError(null, 101, new Object[] { "information gain", exampleSet.getLabel().getName() }); } Attribute bestAttribute = null; double bestValue = Double.NEGATIVE_INFINITY; double entropy = getEntropy(exampleSet); for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) { Attribute attribute = exampleSet.getAttribute(i); double informationGain = getInformationGain(exampleSet, attribute, entropy, ratioGain); if (informationGain > bestValue) { bestValue = informationGain; bestAttribute = attribute; } } return bestAttribute; } public static double getInformationGain(ExampleSet exampleSet, Attribute attribute, boolean ratioGain) { return getInformationGain(exampleSet, attribute, getEntropy(exampleSet), ratioGain); } /** Returns the information gain for one Attribute <tt>i</tt>. Uses ration gain. */ public static double getInformationGain(ExampleSet exampleSet, Attribute attribute, double entropy, boolean ratioGain) { // infoX berechnen int size = exampleSet.getSize(); double infoX = 0.0; double splitInfoX = 0.0; SplittedExampleSet splittedES = null; if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NOMINAL)) { splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute); } else { double threshold = getThreshold(exampleSet, attribute); splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute, threshold); } for (int n = 0; n < splittedES.getNumberOfSubsets(); n++) { splittedES.selectSingleSubset(n); infoX += ((double)splittedES.getSize() / (double)size) * entropy; if (ratioGain) { double split = (double)splittedES.getSize() / (double)size; double splitLogarithm = Math.log(split) / Math.log(2); splitInfoX = split * splitLogarithm; } } // information gain fuer das Attribut berechnen. double informationGain = entropy - infoX; // eventuell gain ratio statt information gain berechnen. if (ratioGain) informationGain = informationGain / -splitInfoX; return informationGain; } /** Returns the information gain value for all attributes as an array. */ public static double[] getInformationGain(ExampleSet exampleSet, boolean ratioGain) throws OperatorException { if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(exampleSet.getLabel().getValueType(), Ontology.NOMINAL)) { throw new UserError(null, 101, new Object[] {"information gain", exampleSet.getLabel().getName()}); } double[] result = new double[exampleSet.getNumberOfAttributes()]; double entropy = getEntropy(exampleSet); for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) result[i] = getInformationGain(exampleSet, exampleSet.getAttribute(i), entropy, ratioGain); return result; } /** Returns the entropy of the example sets. */ public static double getEntropy(ExampleSet exampleSet) { // Anzahl der Elemente in den Klassen bestimmen int[] classes = new int[exampleSet.getLabel().getNumberOfClasses()]; ExampleReader i = exampleSet.getExampleReader(); while (i.hasNext()) { int currentLabel = (int)i.next().getLabel() - 1; classes[currentLabel]++; } // Entropie berechnen double result = 0.0; for (int n = 0; n < classes.length; n++) { if (classes[n] != 0) { double prob = (double)classes[n] / (double)exampleSet.getSize(); double logarithm = Math.log(prob) / Math.log(2); result += prob * logarithm; } } return (- result); } /** Returns the best threshold for the given attribute so that the subsets have the highest entropy. The attribute must be continuous. */ public static double getThreshold(ExampleSet exampleSet, Attribute attribute) { double bestThreshold = Double.NaN; double bestInfoGainSum = Double.POSITIVE_INFINITY; double[] values = new double[exampleSet.getSize()]; ExampleReader reader = exampleSet.getExampleReader(); int i = 0; while (reader.hasNext()) values[i++] = reader.next().getValue(attribute); for (int n = 0; n < values.length; n++) { double threshold = values[n]; SplittedExampleSet splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute, threshold); splittedES.selectSingleSubset(0); double infoGainSum = getEntropy(splittedES); splittedES.selectSingleSubset(1); infoGainSum += getEntropy(splittedES); if (infoGainSum < bestInfoGainSum) { bestInfoGainSum = infoGainSum; bestThreshold = threshold; } } return bestThreshold; } // -------------------- GENERATION -------------------- /** Sets the number of argument combinations for each feature generator and returns * the sum of combinations. */ public static int setArgumentCombinations(ExampleSet exampleSet, List generators) { int combinationSum = 0; Iterator i = generators.listIterator(); while (i.hasNext()) { FeatureGenerator g = (FeatureGenerator)i.next(); int c = numberOfApplicableArgumentCombinations(exampleSet, g); combinationSum += c; if (c > 0) { g.setArgumentCombinations(c); } } return combinationSum; } /** Returns the number of possible attribute combinations for this generator. */ public static int numberOfApplicableArgumentCombinations(ExampleSet exampleSet, FeatureGenerator generator) { Attribute[] input = generator.getInputAttributes(); int numberOfCombinations = 1; for (int i = 0; i < input.length; i++) { Attribute[] ca = getCompatibleAttributes(exampleSet, input[i]); if (ca == null) return 0; numberOfCombinations *= ca.length; } return numberOfCombinations; } /** Returns all attributes which are compatible with <tt>a</tt> (more * specific). For each value series block the start index is used. If no compatible * attributes are found, returns null. */ public static Attribute[] getCompatibleAttributes(ExampleSet exampleSet, Attribute a) { List attributes = new LinkedList(); for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) { Attribute a1 = exampleSet.getAttribute(i); // check for compatibility... if (a1.compatible(a)) { // ...and add attribute index if ok attributes.add(a1); // skip the rest of the block if value series if (a1.isSeries()) { i = exampleSet.getBlockEndIndex(i); } } } if (attributes.size() == 0) return null; Attribute[] result = new Attribute[attributes.size()]; attributes.toArray(result); return result; } public static Attribute[] getRandomCompatibleAttributes(ExampleSet exampleSet, Attribute[] expectedInputAttributes) { Attribute [] arguments = new Attribute [expectedInputAttributes.length]; for (int j = 0 ; j < expectedInputAttributes.length ; j++){ Attribute attribute = expectedInputAttributes[j]; Attribute[] compatibleAttributes = getCompatibleAttributes(exampleSet, attribute); arguments[j] = compatibleAttributes[RandomGenerator.getGlobalRandomGenerator().nextInt(compatibleAttributes.length)]; } return arguments; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -