⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 associationrulestaxonomybuild.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

 /**
  * Title: XELOPES Data Mining Library
  * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
  * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
  * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
  * @author Carsten Weisse
  * @author Michael Thess
  * @version 1.0
  */
package com.prudsys.pdm.Examples;

import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Hashtable;

import com.prudsys.pdm.Automat.MiningAutomationAssignment;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.CategoryHierarchy;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.Records.Csv.MiningCsvStream;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesAlgorithm;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesMiningModel;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesSettings;
import com.prudsys.pdm.Models.AssociationRules.RulesNumberAssessment;
import com.prudsys.pdm.Models.AssociationRules.RulesNumberCallback;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.PmmlUtils;

/**
 * Builds an association rule model with with taxonomy and writes it to
 * PMML file 'AssociationRulesTaxonomyModel.xml'.
 */
public class AssociationRulesTaxonomyBuild extends BasisExample {

  /**
   * Empty constructor.
   */
  public AssociationRulesTaxonomyBuild() {
  }

  /**
   * Run the example of this class.
   *
   * @throws Exception error while example is running
   */
  public void runExample() throws Exception {

    // Create metadata and open csv source stream:
    MiningDataSpecification mds = new MiningDataSpecification();
    mds.setRelationName("Sessions");
    CategoricalAttribute transactId = new CategoricalAttribute("transactionId");
    CategoricalAttribute itemId = new CategoricalAttribute("itemId");
    NumericAttribute itemIndex = new NumericAttribute("itemIndex");
    mds.addMiningAttribute(transactId);
    mds.addMiningAttribute(itemIndex);
    mds.addMiningAttribute(itemId);
    MiningInputStream inputData = new MiningCsvStream( "data/csv/sessions.txt", mds );
    inputData.open();

    // Run through data to determine all categories of itemId (only for taxonomy):
    while (inputData.next()) inputData.read();
    inputData.reset();

    MiningDataSpecification metaData = inputData.getMetaData();

    // Get transactional attributes:
    CategoricalAttribute categoryItemId = (CategoricalAttribute)metaData.getMiningAttribute( "itemId" );
    CategoricalAttribute categoryTransactId = (CategoricalAttribute)metaData.getMiningAttribute( "transactionId" );

    // Add taxonomy of items to categoryItemId (for 'sessions.txt'):
    addTaxonomy(categoryItemId);

    // Create MiningSettings object and assign metadata:
    AssociationRulesSettings miningSettings = new AssociationRulesSettings();
    miningSettings.setDataSpecification( metaData );

    // Assign settings:
    miningSettings.setItemId( categoryItemId );
    miningSettings.setTransactionId( categoryTransactId );
    miningSettings.setMinimumConfidence( 0.3 );
    miningSettings.setMinimumSupport( 0.5 );
    miningSettings.verifySettings();

    // Get default mining algorithm specification (MAS) from 'algorithms.xml':
    MiningAlgorithmSpecification miningAlgorithmSpecification =
      MiningAlgorithmSpecification.getMiningAlgorithmSpecification( "AprioriSimple", null);
    if( miningAlgorithmSpecification == null )
      throw new MiningException( "Can't find application AprioriSimple." );

    // Get class name from algorithms specification:
    String className = miningAlgorithmSpecification.getClassname();
    if( className == null )
      throw new MiningException( "classname attribute expected." );

    // Set and display mining algorithm specification parameters:
    miningAlgorithmSpecification.setMAPValue("minimumItemSize", "1");
    miningAlgorithmSpecification.setMAPValue("maximumItemSize", "-1");
    GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);

    // Create automation parameter, if automation is required:
    MiningAutomationAssignment maa = new MiningAutomationAssignment();
    maa.setMiningModelAssessment( new RulesNumberAssessment() );
    maa.setMiningAutomationCallback( new RulesNumberCallback() );
    maa.setMinAssessment(50);
    maa.setMaxAssessment(100);
    maa.setMaxIterationNumber(30);

    // Create algorithm object with default values:
    AssociationRulesAlgorithm algorithm = (AssociationRulesAlgorithm)
      GeneralUtils.createMiningAlgorithmInstance(className);

    // Put it all together:
    algorithm.setMiningInputStream( inputData );
    algorithm.setMiningSettings( miningSettings );
    algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
    algorithm.setMiningAutomationAssignment( maa );
    // Parameter specific for AssociationRulesAlgorithm but not in MAS:
    algorithm.setUseTaxonomy(true);
    algorithm.setPruneRuleTaxType(
        AssociationRulesAlgorithm.TAX_REMOVE_ANCEST_AND_PARENTS_FROM_RULE );
    algorithm.setExportTransactIds(false);
    algorithm.setExportTransactItemNames(
        AssociationRulesMiningModel.EXPORT_PMML_NAME_TYPE_XELOPES );
    algorithm.verify();

    // Build the mining model with automation:
    MiningModel model = algorithm.buildModelWithAutomation();
    System.out.println("calculation time [s]: " + algorithm.getTimeSpentToBuildModel());

    // Show results:
    AssociationRulesBuild.showRules((AssociationRulesMiningModel) model);

    // Write to PMML:
    FileWriter writer = new FileWriter("data/pmml/AssociationRulesTaxonomyModel.xml");
    model.writePmml(writer);

    // Show in browser:
    if (debug == 2) PmmlUtils.openPmmlBrowser("AssociationRulesTaxonomyModel.xml");
  }

  /**
   * Example of building an association rules model with taxonomies
   * and parameter automation.
   *
   * @param args arguments (ignored)
   */
  public static void main(String[] args) {

    try {
      new AssociationRulesTaxonomyBuild().runExample();
    }
    catch (Exception ex) {
      ex.printStackTrace();
    }
  }

  /**
   * Add taxonomy to itemId attribute.
   *
   * This taxonomy operation works for simple hierarchical data
   * like it is often (implicitely) contained in web server
   * log files, namely the file 'sessions.txt'. It carries out the
   * following operation: To any category with string representation
   * <n_1>.<n_2>. ... <n_k-1><n_k>.<category name 1> the parent category
   * <n_1>.<n_2>. ... <n_k-1>.<category name 2> is assigned. <p>
   *
   * Example:
   * The category '2.1.3.2.EC_Tech' has the parent '2.1.3.EC' which
   * in turn has the parent '2.1.Software', etc. <p>
   *
   * To all categories with single dot number like '3.Software' the
   * root category '0.Home' is assigned. (Of course, except to the
   * root category itself.)
   *
   * @param itemId categorical attribute to enrich with taxonomy
   * @throws MiningException cannot add taxonomy
   */
  public static void addTaxonomy(CategoricalAttribute itemId)
    throws MiningException {

    // Hierarchy for attribute itemId:
    CategoryHierarchy cah = new CategoryHierarchy();

    // Create hashtable number => category:
    Hashtable<String, Category> numCat = new Hashtable<String, Category>();
    ArrayList categs = itemId.getValues();
    for (int i = 0; i < categs.size(); i++) {
      Category categ = (Category) categs.get(i);
      String cats    = categ.toString();
      // Find number:
      int ndot = 0;
      int ldot = -1;
      for (int j = 0; j < cats.length(); j++) {
        if (cats.charAt(j) == '.') {
          ndot = ndot + 1;
          ldot = j;
        };
      };
      String fnum = cats.substring(0, ldot+1);
      numCat.put(fnum, categ);
    };

    // Find parent in hashtable and add to category hierarchy:
    for (int i = 0; i < categs.size(); i++) {
      Category categ = (Category) categs.get(i);
      String cats    = categ.toString();
      // Find number:
      int ndot = 0;
      int ldot = -1;
      for (int j = 0; j < cats.length(); j++) {
        if (cats.charAt(j) == '.') {
          ndot = ndot + 1;
          ldot = j;
        };
      };
      String fnum = cats.substring(0, ldot+1);

      // Add parent category:
      if (fnum.equals("0."))
          continue;
      Category pcateg = numCat.get("0.");
      if (ndot > 1) {

        int ndot2 = 0;
        int ldot2 = -1;
        for (int j = 0; j < fnum.length(); j++) {
          if (cats.charAt(j) == '.') {
            ndot2 = ndot2 + 1;
            if (ndot2 == ndot - 1)
              ldot2 = j;
          };
        };
        String pfnum = fnum.substring(0, ldot2+1);
        pcateg = numCat.get(pfnum);
      }
      System.out.println(categ + " => " + pcateg);

      // Add relationship to category hierarchy:
      cah.addRelationship(pcateg, categ);
    };

    // Set taxonomy name:
    cah.setName("websiteHierarchy");

    // Add taxonomy to item ID attribute:
    itemId.setTaxonomy(cah);
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -