📄 distribution.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.classifiers;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.io.*;
import java.util.*;
import org.agentacademy.modules.dataminer.core.*;
/**
* Class for handling a distribution of class values.
*
*/
public class Distribution implements Cloneable, Serializable {
/** Weight of instances per class per bag. */
private double m_perClassPerBag[][];
/** Weight of instances per bag. */
private double m_perBag[];
/** Weight of instances per class. */
private double m_perClass[];
/** Total weight of instances. */
private double totaL;
/**
* Creates and initializes a new distribution.
*/
public Distribution(int numBags,int numClasses) {
int i;
m_perClassPerBag = new double [numBags][0];
m_perBag = new double [numBags];
m_perClass = new double [numClasses];
for (i=0;i<numBags;i++)
m_perClassPerBag[i] = new double [numClasses];
totaL = 0;
}
/**
* Creates and initializes a new distribution using the given
* array. WARNING: it just copies a reference to this array.
*/
public Distribution(double [][] table) {
int i, j;
m_perClassPerBag = table;
m_perBag = new double [table.length];
m_perClass = new double [table[0].length];
for (i = 0; i < table.length; i++)
for (j = 0; j < table[i].length; j++) {
m_perBag[i] += table[i][j];
m_perClass[j] += table[i][j];
totaL += table[i][j];
}
}
/**
* Creates a distribution with only one bag according
* to instances in source.
*
* @exception Exception if something goes wrong
*/
public Distribution(Instances source) throws Exception {
m_perClassPerBag = new double [1][0];
m_perBag = new double [1];
totaL = 0;
m_perClass = new double [source.numClasses()];
m_perClassPerBag[0] = new double [source.numClasses()];
Enumeration enum = source.enumerateInstances();
while (enum.hasMoreElements())
add(0,(Instance) enum.nextElement());
}
/**
* Creates a distribution according to given instances and
* split model.
*
* @exception Exception if something goes wrong
*/
public Distribution(Instances source,
ClassifierSplitModel modelToUse)
throws Exception {
int index;
Instance instance;
double[] weights;
m_perClassPerBag = new double [modelToUse.numSubsets()][0];
m_perBag = new double [modelToUse.numSubsets()];
totaL = 0;
m_perClass = new double [source.numClasses()];
for (int i = 0; i < modelToUse.numSubsets(); i++)
m_perClassPerBag[i] = new double [source.numClasses()];
Enumeration enum = source.enumerateInstances();
while (enum.hasMoreElements()) {
instance = (Instance) enum.nextElement();
index = modelToUse.whichSubset(instance);
if (index != -1)
add(index, instance);
else {
weights = modelToUse.weights(instance);
addWeights(instance, weights);
}
}
}
/**
* Creates distribution with only one bag by merging all
* bags of given distribution.
*/
public Distribution(Distribution toMerge) {
totaL = toMerge.totaL;
m_perClass = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClass,0,m_perClass,0,toMerge.numClasses());
m_perClassPerBag = new double [1] [0];
m_perClassPerBag[0] = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClass,0,m_perClassPerBag[0],0,
toMerge.numClasses());
m_perBag = new double [1];
m_perBag[0] = totaL;
}
/**
* Creates distribution with two bags by merging all bags apart of
* the indicated one.
*/
public Distribution(Distribution toMerge, int index) {
int i;
totaL = toMerge.totaL;
m_perClass = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClass,0,m_perClass,0,toMerge.numClasses());
m_perClassPerBag = new double [2] [0];
m_perClassPerBag[0] = new double [toMerge.numClasses()];
System.arraycopy(toMerge.m_perClassPerBag[index],0,m_perClassPerBag[0],0,
toMerge.numClasses());
m_perClassPerBag[1] = new double [toMerge.numClasses()];
for (i=0;i<toMerge.numClasses();i++)
m_perClassPerBag[1][i] = toMerge.m_perClass[i]-m_perClassPerBag[0][i];
m_perBag = new double [2];
m_perBag[0] = toMerge.m_perBag[index];
m_perBag[1] = totaL-m_perBag[0];
}
/**
* Returns number of non-empty bags of distribution.
*/
public final int actualNumBags() {
int returnValue = 0;
int i;
for (i=0;i<m_perBag.length;i++)
if (Utils.gr(m_perBag[i],0))
returnValue++;
return returnValue;
}
/**
* Returns number of classes actually occuring in distribution.
*/
public final int actualNumClasses() {
int returnValue = 0;
int i;
for (i=0;i<m_perClass.length;i++)
if (Utils.gr(m_perClass[i],0))
returnValue++;
return returnValue;
}
/**
* Returns number of classes actually occuring in given bag.
*/
public final int actualNumClasses(int bagIndex) {
int returnValue = 0;
int i;
for (i=0;i<m_perClass.length;i++)
if (Utils.gr(m_perClassPerBag[bagIndex][i],0))
returnValue++;
return returnValue;
}
/**
* Adds given instance to given bag.
*
* @exception Exception if something goes wrong
*/
public final void add(int bagIndex,Instance instance)
throws Exception {
int classIndex;
double weight;
classIndex = (int)instance.classValue();
weight = instance.weight();
m_perClassPerBag[bagIndex][classIndex] =
m_perClassPerBag[bagIndex][classIndex]+weight;
m_perBag[bagIndex] = m_perBag[bagIndex]+weight;
m_perClass[classIndex] = m_perClass[classIndex]+weight;
totaL = totaL+weight;
}
/**
* Subtracts given instance from given bag.
*
* @exception Exception if something goes wrong
*/
public final void sub(int bagIndex,Instance instance)
throws Exception {
int classIndex;
double weight;
classIndex = (int)instance.classValue();
weight = instance.weight();
m_perClassPerBag[bagIndex][classIndex] =
m_perClassPerBag[bagIndex][classIndex]-weight;
m_perBag[bagIndex] = m_perBag[bagIndex]-weight;
m_perClass[classIndex] = m_perClass[classIndex]-weight;
totaL = totaL-weight;
}
/**
* Adds counts to given bag.
*/
public final void add(int bagIndex, double[] counts) {
double sum = Utils.sum(counts);
for (int i = 0; i < counts.length; i++)
m_perClassPerBag[bagIndex][i] += counts[i];
m_perBag[bagIndex] = m_perBag[bagIndex]+sum;
for (int i = 0; i < counts.length; i++)
m_perClass[i] = m_perClass[i]+counts[i];
totaL = totaL+sum;
}
/**
* Adds all instances with unknown values for given attribute, weighted
* according to frequency of instances in each bag.
*
* @exception Exception if something goes wrong
*/
public final void addInstWithUnknown(Instances source,
int attIndex)
throws Exception {
double [] probs;
double weight,newWeight;
int classIndex;
Instance instance;
int j;
probs = new double [m_perBag.length];
for (j=0;j<m_perBag.length;j++) {
if (Utils.eq(totaL, 0)) {
probs[j] = 1.0 / probs.length;
} else {
probs[j] = m_perBag[j]/totaL;
}
}
Enumeration enum = source.enumerateInstances();
while (enum.hasMoreElements()) {
instance = (Instance) enum.nextElement();
if (instance.isMissing(attIndex)) {
classIndex = (int)instance.classValue();
weight = instance.weight();
m_perClass[classIndex] = m_perClass[classIndex]+weight;
totaL = totaL+weight;
for (j = 0; j < m_perBag.length; j++) {
newWeight = probs[j]*weight;
m_perClassPerBag[j][classIndex] = m_perClassPerBag[j][classIndex]+
newWeight;
m_perBag[j] = m_perBag[j]+newWeight;
}
}
}
}
/**
* Adds all instances in given range to given bag.
*
* @exception Exception if something goes wrong
*/
public final void addRange(int bagIndex,Instances source,
int startIndex, int lastPlusOne)
throws Exception {
double sumOfWeights = 0;
int classIndex;
Instance instance;
int i;
for (i = startIndex; i < lastPlusOne; i++) {
instance = (Instance) source.instance(i);
classIndex = (int)instance.classValue();
sumOfWeights = sumOfWeights+instance.weight();
m_perClassPerBag[bagIndex][classIndex] += instance.weight();
m_perClass[classIndex] += instance.weight();
}
m_perBag[bagIndex] += sumOfWeights;
totaL += sumOfWeights;
}
/**
* Adds given instance to all bags weighting it according to given weights.
*
* @exception Exception if something goes wrong
*/
public final void addWeights(Instance instance,
double [] weights)
throws Exception {
int classIndex;
int i;
classIndex = (int)instance.classValue();
for (i=0;i<m_perBag.length;i++) {
double weight = instance.weight() * weights[i];
m_perClassPerBag[i][classIndex] = m_perClassPerBag[i][classIndex] + weight;
m_perBag[i] = m_perBag[i] + weight;
m_perClass[classIndex] = m_perClass[classIndex] + weight;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -