tools.java

来自「一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考」· Java 代码 · 共 332 行
JAVA
332 行
/*
 *  YALE - Yet Another Learning Environment
 *  Copyright (C) 2001-2004
 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, 
 *          Katharina Morik, Oliver Ritthoff
 *      Artificial Intelligence Unit
 *      Computer Science Department
 *      University of Dortmund
 *      44221 Dortmund,  Germany
 *  email: yale-team@lists.sourceforge.net
 *  web:   http://yale.cs.uni-dortmund.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as 
 *  published by the Free Software Foundation; either version 2 of the
 *  License, or (at your option) any later version. 
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *  USA.
 */
package edu.udo.cs.yale.example;

import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.RandomGenerator;
import edu.udo.cs.yale.generator.FeatureGenerator;

import java.util.List;
import java.util.LinkedList;
import java.util.Iterator;

/** Provides some tools for calculation of certain measures and feature generation.
 *
 *  @version $Id: Tools.java,v 2.18 2004/09/04 20:07:02 ingomierswa Exp $
 */
public class Tools {

    // ================================================================================
    //          TABLE CREATION
    // ================================================================================

    /** After creation of a new MemoryExampleTable with given size all values are Double.NaN. Use
     *  this method to fill the table with random values in the range specified by minimum and maximum values 
     *  of the attributes. */
    public static void fillTableWithRandomValues(ExampleTable exampleTable) {
	RandomGenerator random = RandomGenerator.getGlobalRandomGenerator();
	DataRowReader reader = exampleTable.getDataReader();
	Attribute[] attributes = exampleTable.getAttributes();
	while (reader.hasNext()) {
	    DataRow dataRow = reader.next();
	    for (int i = 0; i < attributes.length; i++) {
		if (attributes[i] != null)
		    dataRow.set(attributes[i], 
				random.nextDoubleInRange(attributes[i].getMinimum(), 
							 attributes[i].getMaximum()));
	    }
	}
    }

    // ================================================================================
    // -------------------- INFORMATION GAIN --------------------------------------
    // ================================================================================

    /** Returns the Attribute with the best information gain.     *
     *  @param ratioGain if true, the ratio gain criterion is used
     */
    public static Attribute getMostInformativeAttribute(ExampleSet exampleSet, boolean ratioGain) throws OperatorException {
	if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(exampleSet.getLabel().getValueType(), Ontology.NOMINAL)) {
	    throw new UserError(null, 101, 
				new Object[] { "information gain", exampleSet.getLabel().getName() });
	}

	Attribute bestAttribute = null;
	double bestValue = Double.NEGATIVE_INFINITY;

	double entropy = getEntropy(exampleSet);
	for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) {
	    Attribute attribute = exampleSet.getAttribute(i);
	    double informationGain = getInformationGain(exampleSet, attribute, entropy, ratioGain);
	    if (informationGain > bestValue) {
		bestValue = informationGain;
		bestAttribute = attribute;
	    }	    
	}
	return bestAttribute;
    }

    
    public static double getInformationGain(ExampleSet exampleSet, Attribute attribute, boolean ratioGain) {
	return getInformationGain(exampleSet, attribute, getEntropy(exampleSet), ratioGain);
    }

    /** Returns the information gain for one Attribute <tt>i</tt>. Uses ration gain. */
    public static double getInformationGain(ExampleSet exampleSet, Attribute attribute, double entropy, boolean ratioGain) {

	// infoX berechnen
	int size               = exampleSet.getSize();
	double infoX           = 0.0;
	double splitInfoX      = 0.0;
	
	SplittedExampleSet splittedES = null;

	if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NOMINAL)) {
	    splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute);
	} else {
	    double threshold = getThreshold(exampleSet, attribute);
	    splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute, threshold);
	}
	
	for (int n = 0; n < splittedES.getNumberOfSubsets(); n++) {
	    splittedES.selectSingleSubset(n);
	    infoX += ((double)splittedES.getSize() / (double)size) * entropy;
	    if (ratioGain) {
		double split = (double)splittedES.getSize() / (double)size;
		double splitLogarithm = Math.log(split) / Math.log(2);
		splitInfoX = split * splitLogarithm;
	    }
	}	
	
  	// information gain fuer das Attribut berechnen.
  	double informationGain = entropy - infoX;
	
  	// eventuell gain ratio statt information gain berechnen.
  	if (ratioGain) informationGain = informationGain / -splitInfoX;

 	return informationGain;
    }

    /** Returns the information gain value for all attributes as an array.
     */
    public static double[] getInformationGain(ExampleSet exampleSet, boolean ratioGain) throws OperatorException {
	if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(exampleSet.getLabel().getValueType(), Ontology.NOMINAL)) {
	    throw new UserError(null, 101, new Object[] {"information gain", exampleSet.getLabel().getName()});
	}

	double[] result = new double[exampleSet.getNumberOfAttributes()];
	double entropy = getEntropy(exampleSet);
	
	for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) 
	    result[i] = getInformationGain(exampleSet, exampleSet.getAttribute(i), entropy, ratioGain);
	
	return result;
    }

    /** Returns the entropy of the example sets. */
    public static double getEntropy(ExampleSet exampleSet) {
	// Anzahl der Elemente in den Klassen bestimmen
	int[] classes = new int[exampleSet.getLabel().getValues().size()];

	ExampleReader i = exampleSet.getExampleReader();
	while (i.hasNext()) {
	    int currentLabel = (int)i.next().getLabel() - Attribute.FIRST_CLASS_INDEX;
	    classes[currentLabel]++;
	}

	// Entropie berechnen
	double result = 0.0;
	for (int n = 0; n < classes.length; n++) {
	    if (classes[n] != 0) {
		double prob = (double)classes[n] / (double)exampleSet.getSize();
		double logarithm = Math.log(prob) / Math.log(2);
		result += prob * logarithm;
	    }
	}
	return (- result);
    }


    /** Returns the best threshold for the given attribute so that the subsets have the highest entropy. The attribute must be continuous.
     */
    public static double getThreshold(ExampleSet exampleSet, Attribute attribute) {
	double bestThreshold = Double.NaN;
	double bestInfoGainSum = Double.POSITIVE_INFINITY;

	double[] values = new double[exampleSet.getSize()];
	ExampleReader reader = exampleSet.getExampleReader();
	int i = 0;
	while (reader.hasNext()) 
	    values[i++] = reader.next().getValue(attribute);
	
	for (int n = 0; n < values.length; n++) {
	    double threshold = values[n];

	    SplittedExampleSet splittedES = SplittedExampleSet.splitByAttribute(exampleSet, attribute, threshold);
	    
	    splittedES.selectSingleSubset(0);
	    double infoGainSum = getEntropy(splittedES);
	    splittedES.selectSingleSubset(1);
	    infoGainSum += getEntropy(splittedES);

	    if (infoGainSum < bestInfoGainSum) {
		bestInfoGainSum = infoGainSum;
		bestThreshold = threshold;
	    }
	}
	
	return bestThreshold;
    }


    // ================================================================================
    // -------------------- GENERATION --------------------
    // ================================================================================

    public static Attribute[] createAttributeArray(ExampleSet exampleSet) {
	Attribute[] attributes = new Attribute[exampleSet.getNumberOfAttributes()];
	for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) {
	    attributes[i] = exampleSet.getAttribute(i);
	}
	return attributes;
    }

    public static Attribute[] getRandomCompatibleAttributes(ExampleSet exampleSet, 
							    FeatureGenerator generator, 
							    int maxDepth, String[] functions) {
	List inputAttributes = generator.getInputCandidates(exampleSet, maxDepth, functions);
	if (inputAttributes.size() > 0)
	    return (Attribute[])inputAttributes.get(RandomGenerator.getGlobalRandomGenerator().nextInt(inputAttributes.size()));
	else return null;
    }

    public static Attribute[] getWeightedCompatibleAttributes(AttributeWeightedExampleSet exampleSet, 
							      FeatureGenerator generator, 
							      int maxDepth, String[] functions) {
	List inputAttributes = generator.getInputCandidates(exampleSet, maxDepth, functions);
	double[] probs = new double[inputAttributes.size()];
	double probSum = 0.0d;
  	Iterator i = inputAttributes.iterator();
	int k = 0;
  	while (i.hasNext()) {
  	    Attribute[] candidate = (Attribute[])i.next();
	    for (int j = 0; j < candidate.length; j++) {
		double weight = exampleSet.getWeight(candidate[j]);
		probSum += weight;
		probs[k] = weight;
	    }
  	}
	for (int j = 0; j < probs.length; j++)
	    probs[j] /= probSum;
	return (Attribute[])inputAttributes.get(RandomGenerator.getGlobalRandomGenerator().randomIndex(probs));
    }


    // ================================================================================
    //          P r o b a b i l t i e s
    // ================================================================================


    public static double getAverageWeight(AttributeWeightedExampleSet exampleSet) {
	int counter = 0;
	double weightSum = 0.0d;
	for (int i = 0; i < exampleSet.getNumberOfAttributes(); i++) {
	    double weight = exampleSet.getWeight(i);
	    if (!Double.isNaN(weight)) {
		weightSum += Math.abs(weight);
		counter++;
	    }
	}
	return weightSum / (double)counter;
    }

    public static double[] getProbabilitiesFromWeights(Attribute[] attributes, AttributeWeightedExampleSet exampleSet) {
	return getProbabilitiesFromWeights(attributes, exampleSet, false);
    }

    public static double[] getInverseProbabilitiesFromWeights(Attribute[] attributes, AttributeWeightedExampleSet exampleSet) {
	return getProbabilitiesFromWeights(attributes, exampleSet, true);
    }

    /** Calculates probabilities for attribtue selection purposes based on the given weight. Attributes whose
     *  weight is not defined in the weight vector get a probability corresponding to the average weight. 
     *  Inverse probabilities can be calculated for cases where attributes with a high weight should be 
     *  selected with small probability. */
    public static double[] getProbabilitiesFromWeights(Attribute[] attributes, 
						       AttributeWeightedExampleSet exampleSet, 
						       boolean inverse) {
	double weightSum = 0.0d;
	int counter = 0;
	for (int i = 0; i < attributes.length; i++) {
	    double weight = exampleSet.getWeight(attributes[i]);
	    if (!Double.isNaN(weight)) {
		weightSum += Math.abs(weight);
		counter++;
	    }
	}
	double weightAverage = weightSum / counter;
	weightSum += (attributes.length - counter) * weightAverage;
	
	double[] probs = new double[attributes.length];
	for (int i = 0; i < probs.length; i++) {
	    double weight = exampleSet.getWeight(attributes[i]);
	    if (Double.isNaN(weight)) {
		probs[i] = weightAverage / weightSum;
	    } else {
		probs[i] = inverse ? 
		    ((2 * weightAverage - Math.abs(weight)) / weightSum) : 
		    (Math.abs(weight) / weightSum);
	    }
	}
	return probs;
    }

    public static Attribute selectAttribute(Attribute[] attributes, double[] probs) {
	double r = RandomGenerator.getGlobalRandomGenerator().nextDouble();
	double sum = 0.0d;
	for (int i = 0; i < attributes.length; i++) {
	    sum += probs[i];
	    if (r < sum) { 
		return attributes[i]; 
	    }
	}
	return attributes[attributes.length-1];
    }
}
tools.java - 源码说明

本页面展示了「一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。」中的 tools.java 源码文件，采用 Java 编程语言编写，共 332 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与LIBSVM相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?