⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 statisticsbuild.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

 /**
  * Title: XELOPES Data Mining Library
  * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
  * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
  * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
  * @author Carsten Weisse
  * @author Michael Thess
  * @version 1.0
  */

package com.prudsys.pdm.Examples;

import java.io.FileWriter;
import java.util.Vector;

import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningAlgorithmParameter;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Statistics.CategoricalGroup;
import com.prudsys.pdm.Models.Statistics.ContingencyEntry;
import com.prudsys.pdm.Models.Statistics.Group;
import com.prudsys.pdm.Models.Statistics.GroupingParameter;
import com.prudsys.pdm.Models.Statistics.NumericGroup;
import com.prudsys.pdm.Models.Statistics.StatisticsMiningModel;
import com.prudsys.pdm.Models.Statistics.StatisticsSettings;
import com.prudsys.pdm.Models.Statistics.TimeGroup;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.PmmlUtils;

/**
 * Builds a statistics model with multidimensional grouping and writes it to
 * PMML file 'StatisticsModel.xml'.
 */
public class StatisticsBuild extends BasisExample {

  /**
   * Empty constructor.
   */
  public StatisticsBuild() {
  }

  /**
   * Run the example of this class.
   *
   * @throws Exception error while example is running
   */
  public void runExample() throws Exception {

    // Open data source and get metadata:
    MiningInputStream inputData = new MiningArffStream( "data/arff/custom-transact.arff");
    MiningDataSpecification metaData = inputData.getMetaData();

    // Get target and grouping attributes:
    MiningAttribute univariateTargetAttribute = metaData.getMiningAttribute( "customerId" );
    MiningAttribute groupingAttribute1        = metaData.getMiningAttribute( "transactionPosition" );
    MiningAttribute groupingAttribute2        = metaData.getMiningAttribute( "itemId" );

    // Create MiningSettings object and assign metadata:
    StatisticsSettings miningSettings = new StatisticsSettings();
    miningSettings.setDataSpecification( metaData );

    // Assign settings:
    int MaxGroups = 10;
    GroupingParameter gp1 = new GroupingParameter(
      metaData.getAttributeIndex(groupingAttribute1), MaxGroups, GroupingParameter.INCREASING);
    GroupingParameter gp2 = new GroupingParameter(
      metaData.getAttributeIndex(groupingAttribute2), MaxGroups, GroupingParameter.INCREASING);
    Vector groups = new Vector();
    groups.addElement( gp1 );
    groups.addElement( gp2 );
    miningSettings.setGrouping( groups );
    miningSettings.setUnivariateTarget(univariateTargetAttribute);
    miningSettings.verifySettings();

    // Generate mining algorithm specification directly:
    MiningAlgorithmSpecification miningAlgorithmSpecification =
      createMiningAlgorithmSpecification();

    // Get class name from algorithms specification:
    String className = miningAlgorithmSpecification.getClassname();
    if( className == null )
      throw new MiningException( "classname attribute expected." );

    // Set and display mining parameters:
    GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);

    // Create algorithm object with default values:
    MiningAlgorithm algorithm = GeneralUtils.createMiningAlgorithmInstance(className);

    // Put it all together:
    algorithm.setMiningInputStream( inputData );
    algorithm.setMiningSettings( miningSettings );
    algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
    algorithm.verify();

    // Build the mining model:
    MiningModel model = algorithm.buildModel();
    System.out.println("calculation time [s]: " + algorithm.getTimeSpentToBuildModel());

    // Show results:
    showStats((StatisticsMiningModel) model);

    // Write to text:
    FileWriter writer = new FileWriter("data/pmml/StatisticsModel.txt");
    model.writePlainText(writer);

    // Write to PMML:
    writer = new FileWriter("data/pmml/StatisticsModel.xml");
    model.writePmml(writer);

    // Show in browser:
    if (debug == 2) PmmlUtils.openPmmlBrowser("StatisticsModel.xml");
  }

  /**
   * Example of building a statistics model.
   *
   * @param args arguments (ignored)
   */
  public static void main(String[] args) {

    try {
      new StatisticsBuild().runExample();
    }
    catch (Exception ex) {
      ex.printStackTrace();
    }
  }

  /**
   * Create MiningAlgorithmSpecification directly instead
   * of reading from algorithms.xml file.
   *
   * @return new object of MiningAlgorithmSpecification
   */
  private static MiningAlgorithmSpecification createMiningAlgorithmSpecification() {

      MiningAlgorithmSpecification miningAlgorithmSpecification =
        new MiningAlgorithmSpecification();
      miningAlgorithmSpecification.setName("Statistics");
      miningAlgorithmSpecification.setFunction("StatisticalAnalysis");
      miningAlgorithmSpecification.setAlgorithm("DescriptiveAnalysis");
      miningAlgorithmSpecification.setClassname(
        "com.prudsys.pdm.Models.Statistics.Algorithms.Statistics");
      miningAlgorithmSpecification.setVersion("1.0");

      MiningAlgorithmParameter[] miningAlgorithmParameter =
        new MiningAlgorithmParameter[0];
      miningAlgorithmSpecification.setInputAttribute(miningAlgorithmParameter);

      return miningAlgorithmSpecification;
  }

  /**
   * Show statistics model.
   *
   * @param statsModel model of statistics
   */
  public static void showStats(StatisticsMiningModel statsModel) {

     // Get root group:
     Group rootGroup = statsModel.getRootGroup();

     // Call resursive statistics:
     String stat = getStat( rootGroup, 0, statsModel );

     System.out.println("Stats: " + stat);
  }

  /**
   * Print some tabs.
   *
   * @param num number of tabs
   * @return tabs
   */
  private static String printTabs(int num) {

        String s = "";
        for(int i=0; i<num; i++) s = s + "\t";
        return s;
  }

  /**
   * Get statistics data recursively.
   *
   * @param group group for output
   * @param level level in tree, only used for display formatting
   * @param statsModel statistics model
   * @return String representation of output
   */
  private static String getStat( Group group, int level, StatisticsMiningModel statsModel ) {

        String description = "";
        MiningDataSpecification metaData = statsModel.getMiningSettings().getDataSpecification();
        int attrNum = group.getGroupAttribute();
        if(attrNum!=-1)
        {
            if(group instanceof CategoricalGroup)
            {
                CategoricalGroup categ = (CategoricalGroup)group;
                CategoricalAttribute attribute = (CategoricalAttribute)metaData.getMiningAttribute(attrNum);
                double value = categ.getValue();
                description = description + printTabs(level);
                description = description + "Categorical group: "+attribute.getName()+" with value: "+value+" ("+attribute.getCategory(value)+")" + "\n";
            }
            else if(group instanceof NumericGroup)
            {
                NumericGroup numeric = (NumericGroup)group;
                NumericAttribute attribute = (NumericAttribute)metaData.getMiningAttribute(attrNum);
                double low = numeric.getLow();
                double high = numeric.getHigh();
                description = description + printTabs(level);
                description = description + "Numeric group: "+attribute.getName()+" ("+low+" .. "+high+")" + "\n";
            }
            else
            {
                TimeGroup time = (TimeGroup)group;
                NumericAttribute attribute = (NumericAttribute)metaData.getMiningAttribute(attrNum);
                int unit = time.getGroupUnit();
                int value = time.getGroupValue();
                description = description + printTabs(level);
                description = description + "Time group: "+attribute.getName()+" with unit ";
                switch(unit)
                {
                    case java.util.Calendar.SECOND: description = description + "'Seconds'"; break;
                    case java.util.Calendar.MINUTE: description = description + "'Minute'"; break;
                    case java.util.Calendar.HOUR_OF_DAY: description = description + "'Hour'"; break;
                    case java.util.Calendar.DAY_OF_MONTH: description = description + "'Day'"; break;
                    case java.util.Calendar.WEEK_OF_MONTH: description = description + "'Week'"; break;
                    case java.util.Calendar.MONTH: description = description + "'Month'"; break;
                    case java.util.Calendar.YEAR: description = description + "'Year'"; break;
                    default: description = description + "'Unknown!!!' = "+unit;
                }
                description = description + " = "+value + "\n";
            }
        }
        else
        {
            description = description + printTabs(level);
            if(group.getParent()==null) description = description + "Root" + "\n";
            else description = description + "Other" + "\n";
        }
        description = description + printTabs(level); description = description + "Contains "+group.getCount()+" vectors" + "\n";
        if(!group.isLeaf())
        {
            description = description + printTabs(level+1);
            description = description + group.getChildCount()+" subgroups" + "\n";
            for(int i=0;i<group.getChildCount();i++)
            description = description + getStat((Group)group.getChildAt(i),level+1,statsModel);
        }
        else
        {
            long count = group.getCount();
            if(count!=0)
            {
                description = description + printTabs(level); description = description + "Univariate:" + "\n";
                description = description + printTabs(level+1); description = description + "Count: "+group.getCount() + "\n";
                description = description + printTabs(level+1); description = description + "Min: "+group.getMin() + "\n";
                description = description + printTabs(level+1); description = description + "Max: "+group.getMax() + "\n";
                description = description + printTabs(level+1); description = description + "Range: "+group.getRange() + "\n";
                description = description + printTabs(level+1); description = description + "Sum: "+group.getSum() + "\n";
                description = description + printTabs(level+1); description = description + "Mean: "+group.getMean() + "\n";
                description = description + printTabs(level+1); description = description + "Quartile 25: "+group.getQuart25() + "\n";
                description = description + printTabs(level+1); description = description + "Quartile 50: "+group.getQuart50() + "\n";
                description = description + printTabs(level+1); description = description + "Quartile 75: "+group.getQuart75() + "\n";
                description = description + printTabs(level+1); description = description + "Interquartile range: "+group.getQuartRange() + "\n";
                description = description + printTabs(level+1); description = description + "Variance: "+group.getVariance() + "\n";
                description = description + printTabs(level+1); description = description + "Standart: "+group.getStandart() + "\n";
                description = description + printTabs(level+1); description = description + "Skewness: "+group.getSkewness() + "\n";
                description = description + printTabs(level+1); description = description + "Variation coefficient: "+group.getVarCoeff() + "\n";
                description = description + printTabs(level); description = description + "Multivariate:" + "\n";
                description = description + printTabs(level+1); description = description + "Correlation: "+group.getCorrelation() + "\n";
                java.util.Enumeration em = group.getContingencyTable();
                java.util.Vector vec;
                if(em!=null)
                {
                    vec = new java.util.Vector();
                    while(em.hasMoreElements())
                    vec.add(em.nextElement());
                    description = description + printTabs(level+1);
                    description = description + "Contingency table contains "+vec.size()+" elements" + "\n";
                    for(int i=0;i<vec.size();i++)
                    {
                        description = description + printTabs(level+2);
                        ContingencyEntry entry = (ContingencyEntry)vec.get(i);
                        description = description + entry.toString() + "\n";
                    }
                }
                else
                {
                    description = description + printTabs(level+1);
                    description = description + "Contingency table is empty" + "\n";
                }
                description = description +  "\n";
            }
            else
            {
                description = description + printTabs(level+1);
                description = description + "Empty group" + "\n";
            }
        }
        return description;
    }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -