⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 statistics.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
 * @author Victor Borichev
 * @version 1.0
 */

package com.prudsys.pdm.Models.Statistics.Algorithms;

import java.util.Calendar;

import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Statistics.AllGroup;
import com.prudsys.pdm.Models.Statistics.CategoricalGroup;
import com.prudsys.pdm.Models.Statistics.ContingencyEntry;
import com.prudsys.pdm.Models.Statistics.Group;
import com.prudsys.pdm.Models.Statistics.GroupingParameter;
import com.prudsys.pdm.Models.Statistics.NumericGroup;
import com.prudsys.pdm.Models.Statistics.StatisticsAlgorithm;
import com.prudsys.pdm.Models.Statistics.TimeGroup;
import com.prudsys.pdm.Models.Statistics.TimeStat;


public class Statistics extends StatisticsAlgorithm
{
    public final static int CATEGORICAL = 1;
    public final static int NUMERIC = 2;

    private Group rootGroup;
    int firstMultiAttr = -1;
    int secondMultiAttr = -1;

    public Statistics() {
    }

    protected void runAlgorithm() throws MiningException
    {
        makeSimpleStat();
        makeAdvancedStat();
        makeMultiStat();
    }

    protected Group getRootGroup()
    {
        return rootGroup;
    }

    public void setMultiAttributes(int one, int two) {
        this.firstMultiAttr = one;
        this.secondMultiAttr = two;
    }

    public Group makeGroups() throws MiningException {
        int i;
        rootGroup = new AllGroup(null);
        miningInputStream.reset();
        int length = grouping.size();
        java.util.Hashtable[] hashes = new java.util.Hashtable[length];
        double[] min = new double[length];
        double[] max = new double[length];
        TimeStat[] times = new TimeStat[length];
        for(i=0;i<length;i++)
        {
            min[i] = Double.MAX_VALUE;
            max[i] = -Double.MAX_VALUE;
            hashes[i] = new java.util.Hashtable();
            times[i] = new TimeStat();
        }
        //MiningDataSpecification metaData = miningInputStream.getMetaData();
        while(miningInputStream.next())
        {
            MiningVector vec = miningInputStream.read();
            for(i=0;i<length;i++)
            {
                int groupAttr = ((GroupingParameter)grouping.get(i)).getAttribute();
                MiningAttribute attribute = (MiningAttribute)metaData.getMiningAttribute(groupAttr);
                double value = vec.getValue(groupAttr);
                if(Double.isNaN(value)) continue;
                if(attribute instanceof CategoricalAttribute)
                {
                    if(value == Double.NEGATIVE_INFINITY) continue;
                    Double dbl = new Double(value);
                    Long cntr = (Long)hashes[i].get(dbl);
                    if(cntr==null) hashes[i].put(dbl,new Long(1));
                    else hashes[i].put(dbl,new Long(cntr.longValue()+1));
                }
                else
                {
                    NumericAttribute numAttr = (NumericAttribute)attribute;
                    if(!numAttr.isTime())
                    {
                        if(value<min[i]) min[i] = value;
                        if(value>max[i]) max[i] = value;
                    }
                    else
                    {
                        times[i].addTime(value);
                    }
                }
            }
        }
        Group[][] groups = new Group[length][];
        for(i=0;i<length;i++)
        {
            int groupAttr = ((GroupingParameter)grouping.get(i)).getAttribute();
            int maxGroups = ((GroupingParameter)grouping.get(i)).getMaxGroups();
            MiningAttribute attribute = (MiningAttribute)metaData.getMiningAttribute(groupAttr);
            if(attribute instanceof CategoricalAttribute)
            {
                double[] values = new double[hashes[i].size()];
                long[] counters = new long[hashes[i].size()];
                java.util.Enumeration em = hashes[i].keys();
                int j = 0;
                while(em.hasMoreElements())
                {
                    Double key = (Double)em.nextElement();
                    values[j] = key.doubleValue();
                    counters[j++] = ((Long)hashes[i].get(key)).longValue();
                }
                int ordering = ((GroupingParameter)grouping.get(i)).getOrdering();
                if(ordering != GroupingParameter.NO_ORDERING) sortValues(counters,values);
                int numGroups;
                if(maxGroups!=-1&&values.length > maxGroups)
                {
                    numGroups = maxGroups - 1;
                    groups[i] = new Group[maxGroups];
                    groups[i][maxGroups-1] = new AllGroup(null);
                    groups[i][maxGroups-1].metaData = metaData;
                }
                else
                {
                    numGroups = values.length;
                    groups[i] = new Group[values.length];
                }
                if(ordering == GroupingParameter.INCREASING || ordering == GroupingParameter.NO_ORDERING)
                for(j=0;j<numGroups;j++)
                {
                  groups[i][j] = new CategoricalGroup(null,groupAttr,values[j]);
                  groups[i][j].metaData = metaData;
                }
                else
                for(j=0;j<numGroups;j++)
                {
                  groups[i][j] = new CategoricalGroup(null,groupAttr,values[values.length-j-1]);
                  groups[i][j].metaData = metaData;
                }
            }
            else
            if(!((NumericAttribute)attribute).isTime())
            {
                double range = max[i] - min[i];
                int numGroups;
                if(maxGroups!=-1) numGroups = maxGroups;
                else numGroups = (int)java.lang.Math.sqrt(range);
                double step = range/(double)numGroups;
                double d = min[i];
                groups[i] = new Group[numGroups];
                for(int j=0;j<numGroups;j++,d=min[i]+(double)j*range/(double)numGroups)
                {
                  groups[i][j] = new NumericGroup(null,groupAttr,d,d+step);
                  groups[i][j].metaData = metaData;
                }
            }
            else
            {
                int unit = times[i].getGroupUnit(maxGroups);
                int[] unitValues = times[i].getUnitValues(unit);
                groups[i] = new Group[unitValues.length];
                for(int j=0;j<unitValues.length;j++)
                {
                  groups[i][j] = new TimeGroup(null,groupAttr,unit,unitValues[j]);
                  groups[i][j].metaData = metaData;
                }
            }
        }
        addGroups(rootGroup,groups,0);
        return rootGroup;
    }

  private void addGroups(Group parent, Group[][] groups, int level)
  {
    if(level<groups.length)
    {
      Group[] children = new Group[groups[level].length];
      for(int i=0;i<groups[level].length;i++)
      {
        children[i] = groups[level][i].makeCopy();
        children[i].setParent(parent);
        addGroups(children[i],groups,level+1);
      }
      parent.setChildren(children);
    }
  }

  private void sortValues(long[] counters, double[] values) {
    int i,j;
    long swapLong;
    double swapDouble;
    for(i=0;i<counters.length;i++)
      for(j=i+1;j<counters.length;j++)
        if(counters[j]<counters[i])
        {
          swapLong = counters[j];
          counters[j] = counters[i];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -