📄 tools.java
字号:
/*
* YALE - Yet Another Learning Environment
* Copyright (C) 2001-2004
* Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,
* Katharina Morik, Oliver Ritthoff
* Artificial Intelligence Unit
* Computer Science Department
* University of Dortmund
* 44221 Dortmund, Germany
* email: yale-team@lists.sourceforge.net
* web: http://yale.cs.uni-dortmund.de/
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*/
package edu.udo.cs.yale.example;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.RandomGenerator;
import edu.udo.cs.yale.generator.FeatureGenerator;
import java.util.List;
import java.util.LinkedList;
import java.util.Iterator;
/** Provides some tools for calculation of certain measures and feature generation.
*
* @version $Id: Tools.java,v 2.18 2004/09/04 20:07:02 ingomierswa Exp $
*/
public class Tools {
// ================================================================================
// TABLE CREATION
// ================================================================================
/** After creation of a new MemoryExampleTable with given size all values are Double.NaN. Use
* this method to fill the table with random values in the range specified by minimum and maximum values
* of the attributes. */
public static void fillTableWithRandomValues(ExampleTable exampleTable) {
RandomGenerator random = RandomGenerator.getGlobalRandomGenerator();
DataRowReader reader = exampleTable.getDataReader();
Attribute[] attributes = exampleTable.getAttributes();
while (reader.hasNext()) {
DataRow dataRow = reader.next();
for (int i = 0; i < attributes.length; i++) {
if (attributes[i] != null)
dataRow.set(attributes[i],
random.nextDoubleInRange(attributes[i].getMinimum(),
attributes[i].getMaximum()));
}
}
}
// ================================================================================
// -------------------- INFORMATION GAIN --------------------------------------
// ================================================================================
/** Returns the Attribute with the best information gain. *
* @param ratioGain if true, the ratio gain criterion is used
*/
public static Attribute getMostInformativeAttribute(ExampleSet exampleSet, boolean ratioGain) throws OperatorException {
if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(exampleSet.getLabel().getValueType(), Ontology.NOMINAL)) {
throw new UserError(null, 101,
new Object[] { "information gain", exampleSet.getLabel().getName() });
}
Attribute bestAttribute = null;
double bestValue = Double.NEGATIVE_INFINITY;
double entropy = getEntropy(exampleSet);
for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) {
Attribute attribute = exampleSet.getAttribute(i);
double informationGain = getInformationGain(exampleSet, attribute, entropy, ratioGain);
if (informationGain > bestValue) {
bestValue = informationGain;
bestAttribute = attribute;
}
}
return bestAttribute;
}
public static double getInformationGain(ExampleSet exampleSet, Attribute attribute, boolean ratioGain) {
return getInformationGain(exampleSet, attribute, getEntropy(exampleSet), ratioGain);
}
/** Returns the information gain for one Attribute <tt>i</tt>. Uses ration gain. */
public static double getInformationGain(ExampleSet exampleSet, Attribute attribute, double entropy, boolean ratioGain) {
// infoX berechnen
int size = exampleSet.getSize();
double infoX = 0.0;
double splitInfoX = 0.0;
SplittedExampleSet splittedES = null;
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NOMINAL)) {
splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute);
} else {
double threshold = getThreshold(exampleSet, attribute);
splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute, threshold);
}
for (int n = 0; n < splittedES.getNumberOfSubsets(); n++) {
splittedES.selectSingleSubset(n);
infoX += ((double)splittedES.getSize() / (double)size) * entropy;
if (ratioGain) {
double split = (double)splittedES.getSize() / (double)size;
double splitLogarithm = Math.log(split) / Math.log(2);
splitInfoX = split * splitLogarithm;
}
}
// information gain fuer das Attribut berechnen.
double informationGain = entropy - infoX;
// eventuell gain ratio statt information gain berechnen.
if (ratioGain) informationGain = informationGain / -splitInfoX;
return informationGain;
}
/** Returns the information gain value for all attributes as an array.
*/
public static double[] getInformationGain(ExampleSet exampleSet, boolean ratioGain) throws OperatorException {
if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(exampleSet.getLabel().getValueType(), Ontology.NOMINAL)) {
throw new UserError(null, 101, new Object[] {"information gain", exampleSet.getLabel().getName()});
}
double[] result = new double[exampleSet.getNumberOfAttributes()];
double entropy = getEntropy(exampleSet);
for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++)
result[i] = getInformationGain(exampleSet, exampleSet.getAttribute(i), entropy, ratioGain);
return result;
}
/** Returns the entropy of the example sets. */
public static double getEntropy(ExampleSet exampleSet) {
// Anzahl der Elemente in den Klassen bestimmen
int[] classes = new int[exampleSet.getLabel().getValues().size()];
ExampleReader i = exampleSet.getExampleReader();
while (i.hasNext()) {
int currentLabel = (int)i.next().getLabel() - Attribute.FIRST_CLASS_INDEX;
classes[currentLabel]++;
}
// Entropie berechnen
double result = 0.0;
for (int n = 0; n < classes.length; n++) {
if (classes[n] != 0) {
double prob = (double)classes[n] / (double)exampleSet.getSize();
double logarithm = Math.log(prob) / Math.log(2);
result += prob * logarithm;
}
}
return (- result);
}
/** Returns the best threshold for the given attribute so that the subsets have the highest entropy. The attribute must be continuous.
*/
public static double getThreshold(ExampleSet exampleSet, Attribute attribute) {
double bestThreshold = Double.NaN;
double bestInfoGainSum = Double.POSITIVE_INFINITY;
double[] values = new double[exampleSet.getSize()];
ExampleReader reader = exampleSet.getExampleReader();
int i = 0;
while (reader.hasNext())
values[i++] = reader.next().getValue(attribute);
for (int n = 0; n < values.length; n++) {
double threshold = values[n];
SplittedExampleSet splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute, threshold);
splittedES.selectSingleSubset(0);
double infoGainSum = getEntropy(splittedES);
splittedES.selectSingleSubset(1);
infoGainSum += getEntropy(splittedES);
if (infoGainSum < bestInfoGainSum) {
bestInfoGainSum = infoGainSum;
bestThreshold = threshold;
}
}
return bestThreshold;
}
// ================================================================================
// -------------------- GENERATION --------------------
// ================================================================================
public static Attribute[] createAttributeArray(ExampleSet exampleSet) {
Attribute[] attributes = new Attribute[exampleSet.getNumberOfAttributes()];
for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) {
attributes[i] = exampleSet.getAttribute(i);
}
return attributes;
}
public static Attribute[] getRandomCompatibleAttributes(ExampleSet exampleSet,
FeatureGenerator generator,
int maxDepth, String[] functions) {
List inputAttributes = generator.getInputCandidates(exampleSet, maxDepth, functions);
if (inputAttributes.size() > 0)
return (Attribute[])inputAttributes.get(RandomGenerator.getGlobalRandomGenerator().nextInt(inputAttributes.size()));
else return null;
}
public static Attribute[] getWeightedCompatibleAttributes(AttributeWeightedExampleSet exampleSet,
FeatureGenerator generator,
int maxDepth, String[] functions) {
List inputAttributes = generator.getInputCandidates(exampleSet, maxDepth, functions);
double[] probs = new double[inputAttributes.size()];
double probSum = 0.0d;
Iterator i = inputAttributes.iterator();
int k = 0;
while (i.hasNext()) {
Attribute[] candidate = (Attribute[])i.next();
for (int j = 0; j < candidate.length; j++) {
double weight = exampleSet.getWeight(candidate[j]);
probSum += weight;
probs[k] = weight;
}
}
for (int j = 0; j < probs.length; j++)
probs[j] /= probSum;
return (Attribute[])inputAttributes.get(RandomGenerator.getGlobalRandomGenerator().randomIndex(probs));
}
// ================================================================================
// P r o b a b i l t i e s
// ================================================================================
public static double getAverageWeight(AttributeWeightedExampleSet exampleSet) {
int counter = 0;
double weightSum = 0.0d;
for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) {
double weight = exampleSet.getWeight(i);
if (!Double.isNaN(weight)) {
weightSum += Math.abs(weight);
counter++;
}
}
return weightSum / (double)counter;
}
public static double[] getProbabilitiesFromWeights(Attribute[] attributes, AttributeWeightedExampleSet exampleSet) {
return getProbabilitiesFromWeights(attributes, exampleSet, false);
}
public static double[] getInverseProbabilitiesFromWeights(Attribute[] attributes, AttributeWeightedExampleSet exampleSet) {
return getProbabilitiesFromWeights(attributes, exampleSet, true);
}
/** Calculates probabilities for attribtue selection purposes based on the given weight. Attributes whose
* weight is not defined in the weight vector get a probability corresponding to the average weight.
* Inverse probabilities can be calculated for cases where attributes with a high weight should be
* selected with small probability. */
public static double[] getProbabilitiesFromWeights(Attribute[] attributes,
AttributeWeightedExampleSet exampleSet,
boolean inverse) {
double weightSum = 0.0d;
int counter = 0;
for (int i = 0; i < attributes.length; i++) {
double weight = exampleSet.getWeight(attributes[i]);
if (!Double.isNaN(weight)) {
weightSum += Math.abs(weight);
counter++;
}
}
double weightAverage = weightSum / counter;
weightSum += (attributes.length - counter) * weightAverage;
double[] probs = new double[attributes.length];
for (int i = 0; i < probs.length; i++) {
double weight = exampleSet.getWeight(attributes[i]);
if (Double.isNaN(weight)) {
probs[i] = weightAverage / weightSum;
} else {
probs[i] = inverse ?
((2 * weightAverage - Math.abs(weight)) / weightSum) :
(Math.abs(weight) / weightSum);
}
}
return probs;
}
public static Attribute selectAttribute(Attribute[] attributes, double[] probs) {
double r = RandomGenerator.getGlobalRandomGenerator().nextDouble();
double sum = 0.0d;
for (int i = 0; i < attributes.length; i++) {
sum += probs[i];
if (r < sum) {
return attributes[i];
}
}
return attributes[attributes.length-1];
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -