⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hierarchicalclusteringalgorithm.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.1
 */

package com.prudsys.pdm.Models.Clustering.Hierarchical;

import java.util.Date;

import com.prudsys.pdm.Core.ApplicationAttribute;
import com.prudsys.pdm.Core.AttributeType;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.MiningSettings;
import com.prudsys.pdm.Input.MiningArrayStream;
import com.prudsys.pdm.Models.Clustering.ClusteringAlgorithm;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Transform.Special.ReplaceMissingValueStream;
import com.prudsys.pdm.Transform.Special.TreatOutlierValueStream;

/**
 * Base class for hierarchical clustering algorithms.
 */
public abstract class HierarchicalClusteringAlgorithm extends ClusteringAlgorithm
{
    // -----------------------------------------------------------------------
    //  Constructor
    // -----------------------------------------------------------------------
    /**
     * Empty constructor.
     */
    public HierarchicalClusteringAlgorithm()
    {
    }

    // -----------------------------------------------------------------------
    //  Getter and setter methods
    // -----------------------------------------------------------------------
    /**
     * Creates an instance of the hierarchical clustering settings class that is
     * required to run the algorithm. The mining settings are assigned through the
     * setMiningSettings method.
     *
     * @return new instance of the hierarchical clustering settings class of the algorithm
     */
    public MiningSettings createMiningSettings() {

      return new HierarchicalClusteringSettings();
    }

    // -----------------------------------------------------------------------
    //  Run hierarchical clustering algorithm and build mining model
    // -----------------------------------------------------------------------
    /**
     * Build the hierarchical clustering mining model. Missing values are
     * replaced by mean (numeric attributes) and mode (categorical attributes)
     * values.
     *
     * @return hierarchical clustering mining model created
     * @exception MiningException cannot build cluster model
     */
    public MiningModel buildModel() throws MiningException
    {
        long start = ( new Date() ).getTime();

        // Outlier treatment and missing value replacement:
        TreatOutlierValueStream tro   = new TreatOutlierValueStream(miningInputStream);
//      tro.setNumOutliers( ApplicationAttribute.OUTLIER_TREATMENT_METHOD_asExtremeValues );
        tro.createTreatOutlierValueTransformationStep();

        ReplaceMissingValueStream rep = new ReplaceMissingValueStream(miningInputStream);
        miningInputStream             = new MiningArrayStream( rep.createReplaceMissingValueStream() );

        // Run algorithm:
        runAlgorithm();

        // Create cluster model:
        HierarchicalClusteringMiningModel model = new HierarchicalClusteringMiningModel();
        model.setMiningSettings( miningSettings );
        model.setInputSpec( applicationInputSpecification );

        // Outlier treatment and missing value in application input specification:
        // Create inner transformation object:
        MiningTransformationActivity mta = new MiningTransformationActivity();
        mta.addTransformationStep( tro.getMts() );
        mta.addTransformationStep( rep.getMts() );
        model.setMiningTransform( mta );

        // Outliers and missing values in application input specification:
        ApplicationAttribute[] appAtt = applicationInputSpecification.getInputAttribute();
        double[] lowVal  = tro.getLowValues();
        double[] highVal = tro.getHighValues();
        double[] repVal  = rep.getRepValues();

        // Loop over all application attributes:
        for (int i = 0; i < appAtt.length; i++) {
          // Numeric application attribute:
          if (appAtt[i].getAttributeType().getType() == AttributeType.NUMERICAL) {

            // Treatment of outliers to application attribute:
            appAtt[i].setOutliers( tro.getNumOutliers() );
            if (appAtt[i].getOutliers().equals(
               ApplicationAttribute.OUTLIER_TREATMENT_METHOD_asExtremeValues) ){
              appAtt[i].setLowValue( String.valueOf( lowVal[i] )  );
              appAtt[i].setHighValue( String.valueOf( highVal[i] ) );
            };

            // Missing values to application attribute:
            appAtt[i].setMissingValueTreatment(
              ApplicationAttribute.MISSING_VALUE_TREATMENT_METHOD_asMean);
            appAtt[i].setMissingValueReplacement( String.valueOf(repVal[i]) );
          };

          // Categorical application attribute:
          if (appAtt[i].getAttributeType().getType() == AttributeType.CATEGORICAL) {

            // Treatment of outliers to application attribute:
            appAtt[i].setOutliers( tro.getCatOutliers() );

            // Missing values to application attribute:
            appAtt[i].setMissingValueTreatment(
              ApplicationAttribute.MISSING_VALUE_TREATMENT_METHOD_asMode);
            appAtt[i].setMissingValueReplacement(
              ((CategoricalAttribute) metaData.getMiningAttribute(i)).getCategory( repVal[i] ).getDisplayValue() );
          };
        };

        // Set clusters and distance type:
        model.setClusters( getClusters() );
        model.setDistance( distance );

        // Set cluster model:
        this.miningModel = model;

        long end = ( new Date() ).getTime();
        timeSpentToBuildModel = ( end - start ) / 1000.0;

        return model;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -