📄 distribution.java
字号:
/*
* YALE - Yet Another Learning Environment
* Copyright (C) 2001-2004
* Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,
* Katharina Morik, Oliver Ritthoff
* Artificial Intelligence Unit
* Computer Science Department
* University of Dortmund
* 44221 Dortmund, Germany
* email: yale-team@lists.sourceforge.net
* web: http://yale.cs.uni-dortmund.de/
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*/
package edu.udo.cs.yale.operator.learner.decisiontree.y45.j48;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* Class for handling a distribution of class values.
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 1.3 $
*/
public class Distribution implements Cloneable, Serializable {
/** Weight of instances per class per bag. */
private double m_perClassPerBag[][];
/** Weight of instances per bag. */
private double m_perBag[];
/** Weight of instances per class. */
private double m_perClass[];
/** Total weight of instances. */
private double totaL;
/**
* Creates and initializes a new distribution.
*/
public Distribution(int numBags,int numClasses) {
int i;
m_perClassPerBag = new double [numBags][0];
m_perBag = new double [numBags];
m_perClass = new double [numClasses];
for (i=0;i<numBags;i++)
m_perClassPerBag[i] = new double [numClasses];
totaL = 0;
}
/**
* Creates and initializes a new distribution using the given
* array. WARNING: it just copies a reference to this array.
*/
public Distribution(double [][] table) {
int i, j;
m_perClassPerBag = table;
m_perBag = new double [table.length];
m_perClass = new double [table[0].length];
for (i = 0; i < table.length; i++)
for (j = 0; j < table[i].length; j++) {
m_perBag[i] += table[i][j];
m_perClass[j] += table[i][j];
totaL += table[i][j];
}
}
/**
* Creates a distribution with only one bag according
* to instances in source.
*
* @exception Exception if something goes wrong
*/
public Distribution(Instances source) throws Exception {
m_perClassPerBag = new double [1][0];
m_perBag = new double [1];
totaL = 0;
m_perClass = new double [source.numClasses()];
m_perClassPerBag[0] = new double [source.numClasses()];
Enumeration enum = source.enumerateInstances();
while (enum.hasMoreElements())
add(0,(Instance) enum.nextElement());
}
/**
* Creates a distribution according to given instances and
* split model.
*
* @exception Exception if something goes wrong
*/
public Distribution(Instances source,
ClassifierSplitModel modelToUse)
throws Exception {
int index;
Instance instance;
double[] weights;
m_perClassPerBag = new double [modelToUse.numSubsets()][0];
m_perBag = new double [modelToUse.numSubsets()];
totaL = 0;
m_perClass = new double [source.numClasses()];
for (int i = 0; i < modelToUse.numSubsets(); i++)
m_perClassPerBag[i] = new double [source.numClasses()];
Enumeration enum = source.enumerateInstances();
while (enum.hasMoreElements()) {
instance = (Instance) enum.nextElement();
index = modelToUse.whichSubset(instance);
if (index != -1)
add(index, instance);
else {
weights = modelToUse.weights(instance);
addWeights(instance, weights);
}
}
}
/**
* Creates distribution with only one bag by merging all
* bags of given distribution.
*/
public Distribution(Distribution toMerge) {
totaL = toMerge.totaL;
m_perClass = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClass,0,m_perClass,0,toMerge.numClasses());
m_perClassPerBag = new double [1] [0];
m_perClassPerBag[0] = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClass,0,m_perClassPerBag[0],0,
toMerge.numClasses());
m_perBag = new double [1];
m_perBag[0] = totaL;
}
/**
* Creates distribution with two bags by merging all bags apart of
* the indicated one.
*/
public Distribution(Distribution toMerge, int index) {
int i;
totaL = toMerge.totaL;
m_perClass = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClass,0,m_perClass,0,toMerge.numClasses());
m_perClassPerBag = new double [2] [0];
m_perClassPerBag[0] = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClassPerBag[index],0,m_perClassPerBag[0],0,
toMerge.numClasses());
m_perClassPerBag[1] = new double [toMerge.numClasses()];
for (i=0;i<toMerge.numClasses();i++)
m_perClassPerBag[1][i] = toMerge.m_perClass[i]-m_perClassPerBag[0][i];
m_perBag = new double [2];
m_perBag[0] = toMerge.m_perBag[index];
m_perBag[1] = totaL-m_perBag[0];
}
/**
* Returns number of non-empty bags of distribution.
*/
public final int actualNumBags() {
int returnValue = 0;
int i;
for (i=0;i<m_perBag.length;i++)
if (Utils.gr(m_perBag[i],0))
returnValue++;
return returnValue;
}
/**
* Returns number of classes actually occuring in distribution.
*/
public final int actualNumClasses() {
int returnValue = 0;
int i;
for (i=0;i<m_perClass.length;i++)
if (Utils.gr(m_perClass[i],0))
returnValue++;
return returnValue;
}
/**
* Returns number of classes actually occuring in given bag.
*/
public final int actualNumClasses(int bagIndex) {
int returnValue = 0;
int i;
for (i=0;i<m_perClass.length;i++)
if (Utils.gr(m_perClassPerBag[bagIndex][i],0))
returnValue++;
return returnValue;
}
/**
* Adds given instance to given bag.
*
* @exception Exception if something goes wrong
*/
public final void add(int bagIndex,Instance instance)
throws Exception {
int classIndex;
double weight;
classIndex = (int)instance.classValue();
weight = instance.weight();
m_perClassPerBag[bagIndex][classIndex] =
m_perClassPerBag[bagIndex][classIndex]+weight;
m_perBag[bagIndex] = m_perBag[bagIndex]+weight;
m_perClass[classIndex] = m_perClass[classIndex]+weight;
totaL = totaL+weight;
}
/**
* Subtracts given instance from given bag.
*
* @exception Exception if something goes wrong
*/
public final void sub(int bagIndex,Instance instance)
throws Exception {
int classIndex;
double weight;
classIndex = (int)instance.classValue();
weight = instance.weight();
m_perClassPerBag[bagIndex][classIndex] =
m_perClassPerBag[bagIndex][classIndex]-weight;
m_perBag[bagIndex] = m_perBag[bagIndex]-weight;
m_perClass[classIndex] = m_perClass[classIndex]-weight;
totaL = totaL-weight;
}
/**
* Adds counts to given bag.
*/
public final void add(int bagIndex, double[] counts) {
double sum = Utils.sum(counts);
for (int i = 0; i < counts.length; i++)
m_perClassPerBag[bagIndex][i] += counts[i];
m_perBag[bagIndex] = m_perBag[bagIndex]+sum;
for (int i = 0; i < counts.length; i++)
m_perClass[i] = m_perClass[i]+counts[i];
totaL = totaL+sum;
}
/**
* Adds all instances with unknown values for given attribute, weighted
* according to frequency of instances in each bag.
*
* @exception Exception if something goes wrong
*/
public final void addInstWithUnknown(Instances source,
int attIndex)
throws Exception {
double [] probs;
double weight,newWeight;
int classIndex;
Instance instance;
int j;
probs = new double [m_perBag.length];
for (j=0;j<m_perBag.length;j++) {
if (Utils.eq(totaL, 0)) {
probs[j] = 1.0 / probs.length;
} else {
probs[j] = m_perBag[j]/totaL;
}
}
Enumeration enum = source.enumerateInstances();
while (enum.hasMoreElements()) {
instance = (Instance) enum.nextElement();
if (instance.isMissing(attIndex)) {
classIndex = (int)instance.classValue();
weight = instance.weight();
m_perClass[classIndex] = m_perClass[classIndex]+weight;
totaL = totaL+weight;
for (j = 0; j < m_perBag.length; j++) {
newWeight = probs[j]*weight;
m_perClassPerBag[j][classIndex] = m_perClassPerBag[j][classIndex]+
newWeight;
m_perBag[j] = m_perBag[j]+newWeight;
}
}
}
}
/**
* Adds all instances in given range to given bag.
*
* @exception Exception if something goes wrong
*/
public final void addRange(int bagIndex,Instances source,
int startIndex, int lastPlusOne)
throws Exception {
double sumOfWeights = 0;
int classIndex;
Instance instance;
int i;
for (i = startIndex; i < lastPlusOne; i++) {
instance = (Instance) source.instance(i);
classIndex = (int)instance.classValue();
sumOfWeights = sumOfWeights+instance.weight();
m_perClassPerBag[bagIndex][classIndex] += instance.weight();
m_perClass[classIndex] += instance.weight();
}
m_perBag[bagIndex] += sumOfWeights;
totaL += sumOfWeights;
}
/**
* Adds given instance to all bags weighting it according to given weights.
*
* @exception Exception if something goes wrong
*/
public final void addWeights(Instance instance,
double [] weights)
throws Exception {
int classIndex;
int i;
classIndex = (int)instance.classValue();
for (i=0;i<m_perBag.length;i++) {
double weight = instance.weight() * weights[i];
m_perClassPerBag[i][classIndex] = m_perClassPerBag[i][classIndex] + weight;
m_perBag[i] = m_perBag[i] + weight;
m_perClass[classIndex] = m_perClass[classIndex] + weight;
totaL = totaL + weight;
}
}
/**
* Checks if at least two bags contain a minimum number of instances.
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -