📄 statistics.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @author Victor Borichev
* @version 1.0
*/
package com.prudsys.pdm.Models.Statistics.Algorithms;
import java.util.Calendar;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Statistics.AllGroup;
import com.prudsys.pdm.Models.Statistics.CategoricalGroup;
import com.prudsys.pdm.Models.Statistics.ContingencyEntry;
import com.prudsys.pdm.Models.Statistics.Group;
import com.prudsys.pdm.Models.Statistics.GroupingParameter;
import com.prudsys.pdm.Models.Statistics.NumericGroup;
import com.prudsys.pdm.Models.Statistics.StatisticsAlgorithm;
import com.prudsys.pdm.Models.Statistics.TimeGroup;
import com.prudsys.pdm.Models.Statistics.TimeStat;
public class Statistics extends StatisticsAlgorithm
{
public final static int CATEGORICAL = 1;
public final static int NUMERIC = 2;
private Group rootGroup;
int firstMultiAttr = -1;
int secondMultiAttr = -1;
public Statistics() {
}
protected void runAlgorithm() throws MiningException
{
makeSimpleStat();
makeAdvancedStat();
makeMultiStat();
}
protected Group getRootGroup()
{
return rootGroup;
}
public void setMultiAttributes(int one, int two) {
this.firstMultiAttr = one;
this.secondMultiAttr = two;
}
public Group makeGroups() throws MiningException {
int i;
rootGroup = new AllGroup(null);
miningInputStream.reset();
int length = grouping.size();
java.util.Hashtable[] hashes = new java.util.Hashtable[length];
double[] min = new double[length];
double[] max = new double[length];
TimeStat[] times = new TimeStat[length];
for(i=0;i<length;i++)
{
min[i] = Double.MAX_VALUE;
max[i] = -Double.MAX_VALUE;
hashes[i] = new java.util.Hashtable();
times[i] = new TimeStat();
}
//MiningDataSpecification metaData = miningInputStream.getMetaData();
while(miningInputStream.next())
{
MiningVector vec = miningInputStream.read();
for(i=0;i<length;i++)
{
int groupAttr = ((GroupingParameter)grouping.get(i)).getAttribute();
MiningAttribute attribute = (MiningAttribute)metaData.getMiningAttribute(groupAttr);
double value = vec.getValue(groupAttr);
if(Double.isNaN(value)) continue;
if(attribute instanceof CategoricalAttribute)
{
if(value == Double.NEGATIVE_INFINITY) continue;
Double dbl = new Double(value);
Long cntr = (Long)hashes[i].get(dbl);
if(cntr==null) hashes[i].put(dbl,new Long(1));
else hashes[i].put(dbl,new Long(cntr.longValue()+1));
}
else
{
NumericAttribute numAttr = (NumericAttribute)attribute;
if(!numAttr.isTime())
{
if(value<min[i]) min[i] = value;
if(value>max[i]) max[i] = value;
}
else
{
times[i].addTime(value);
}
}
}
}
Group[][] groups = new Group[length][];
for(i=0;i<length;i++)
{
int groupAttr = ((GroupingParameter)grouping.get(i)).getAttribute();
int maxGroups = ((GroupingParameter)grouping.get(i)).getMaxGroups();
MiningAttribute attribute = (MiningAttribute)metaData.getMiningAttribute(groupAttr);
if(attribute instanceof CategoricalAttribute)
{
double[] values = new double[hashes[i].size()];
long[] counters = new long[hashes[i].size()];
java.util.Enumeration em = hashes[i].keys();
int j = 0;
while(em.hasMoreElements())
{
Double key = (Double)em.nextElement();
values[j] = key.doubleValue();
counters[j++] = ((Long)hashes[i].get(key)).longValue();
}
int ordering = ((GroupingParameter)grouping.get(i)).getOrdering();
if(ordering != GroupingParameter.NO_ORDERING) sortValues(counters,values);
int numGroups;
if(maxGroups!=-1&&values.length > maxGroups)
{
numGroups = maxGroups - 1;
groups[i] = new Group[maxGroups];
groups[i][maxGroups-1] = new AllGroup(null);
groups[i][maxGroups-1].metaData = metaData;
}
else
{
numGroups = values.length;
groups[i] = new Group[values.length];
}
if(ordering == GroupingParameter.INCREASING || ordering == GroupingParameter.NO_ORDERING)
for(j=0;j<numGroups;j++)
{
groups[i][j] = new CategoricalGroup(null,groupAttr,values[j]);
groups[i][j].metaData = metaData;
}
else
for(j=0;j<numGroups;j++)
{
groups[i][j] = new CategoricalGroup(null,groupAttr,values[values.length-j-1]);
groups[i][j].metaData = metaData;
}
}
else
if(!((NumericAttribute)attribute).isTime())
{
double range = max[i] - min[i];
int numGroups;
if(maxGroups!=-1) numGroups = maxGroups;
else numGroups = (int)java.lang.Math.sqrt(range);
double step = range/(double)numGroups;
double d = min[i];
groups[i] = new Group[numGroups];
for(int j=0;j<numGroups;j++,d=min[i]+(double)j*range/(double)numGroups)
{
groups[i][j] = new NumericGroup(null,groupAttr,d,d+step);
groups[i][j].metaData = metaData;
}
}
else
{
int unit = times[i].getGroupUnit(maxGroups);
int[] unitValues = times[i].getUnitValues(unit);
groups[i] = new Group[unitValues.length];
for(int j=0;j<unitValues.length;j++)
{
groups[i][j] = new TimeGroup(null,groupAttr,unit,unitValues[j]);
groups[i][j].metaData = metaData;
}
}
}
addGroups(rootGroup,groups,0);
return rootGroup;
}
private void addGroups(Group parent, Group[][] groups, int level)
{
if(level<groups.length)
{
Group[] children = new Group[groups[level].length];
for(int i=0;i<groups[level].length;i++)
{
children[i] = groups[level][i].makeCopy();
children[i].setParent(parent);
addGroups(children[i],groups,level+1);
}
parent.setChildren(children);
}
}
private void sortValues(long[] counters, double[] values) {
int i,j;
long swapLong;
double swapDouble;
for(i=0;i<counters.length;i++)
for(j=i+1;j<counters.length;j++)
if(counters[j]<counters[i])
{
swapLong = counters[j];
counters[j] = counters[i];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -