📄 clusteringbuild.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Carsten Weisse
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Examples;
import java.io.FileWriter;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.ClusteringMiningModel;
import com.prudsys.pdm.Models.Clustering.Distance;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringSettings;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Builds a clustering model using the K-Means algorithm and writes it to
* PMML file 'ClusteringModel.xml'.
*/
public class ClusteringBuild extends BasisExample {
/**
* Empty constructor.
*/
public ClusteringBuild() {
}
/**
* Run the example of this class.
*
* @throws Exception error while example is running
*/
public void runExample() throws Exception {
// Open data source and get metadata:
MiningInputStream inputData = new MiningArffStream( "data/arff/iris.arff" );
MiningDataSpecification metaData = inputData.getMetaData();
// Create MiningSettings object and assign metadata:
CDBasedClusteringSettings miningSettings = new CDBasedClusteringSettings();
miningSettings.setDataSpecification( metaData );
// Assign settings:
Distance dist = new Distance();
dist.setType( Distance.TYPE_EUCLIDEAN );
dist.setCompareFunction( Distance.COMPARISON_FUNCTION_ABS_DIFF );
dist.setMeasureType( Distance.MEASURE_TYPE_DISTANCE );
dist.setNormalized( false );
miningSettings.setDistance( dist );
miningSettings.verifySettings();
// Get default mining algorithm specification from 'algorithms.xml':
MiningAlgorithmSpecification miningAlgorithmSpecification =
MiningAlgorithmSpecification.getMiningAlgorithmSpecification( "KMeans", null);
if( miningAlgorithmSpecification == null )
throw new MiningException( "Can't find clustering method." );
// Get class name from algorithms specification:
String className = miningAlgorithmSpecification.getClassname();
if( className == null )
throw new MiningException( "classname attribute expected." );
// Set and display mining parameters:
miningAlgorithmSpecification.setMAPValue("numberOfClusters", "3");
GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);
// Create algorithm object with default values:
MiningAlgorithm algorithm = (CDBasedClusteringAlgorithm)
GeneralUtils.createMiningAlgorithmInstance(className);
// Put it all together:
algorithm.setMiningInputStream( inputData );
algorithm.setMiningSettings( miningSettings );
algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
algorithm.verify();
// Build the mining model:
MiningModel model = algorithm.buildModel();
System.out.println("calculation time [s]: " + algorithm.getTimeSpentToBuildModel());
// Show the clusters:
showClusters( (ClusteringMiningModel) model );
// Write to PMML:
FileWriter writer = new FileWriter("data/pmml/ClusteringModel.xml");
model.writePmml(writer);
// Show in browser:
if (debug == 2) PmmlUtils.openPmmlBrowser("ClusteringModel.xml");
// Show relation header:
System.out.println("Prediction:");
for (int i = 0; i < metaData.getAttributesNumber(); i++) {
System.out.print(metaData.getMiningAttribute(i).getName() + " ");
};
// Show clustering results:
System.out.println();
int i = 0;
inputData.reset();
while (inputData.next()) {
// Make prediction:
MiningVector vector = inputData.read();
int predicted = (int) model.applyModelFunction(vector);
// Output and stats:
System.out.println(" " + ++i +": " + vector + " -> " + predicted);
};
}
/**
* Example of building a cluster model.
*
* @param args arguments (ignored)
*/
public static void main(String[] args) {
try {
new ClusteringBuild().runExample();
}
catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Shows clusters.
*
* @param clustModel clustering model to show
* @throws MiningException cannot show clusters
*/
public static void showClusters(ClusteringMiningModel clustModel)
throws MiningException {
System.out.println("number of clusters: " + clustModel.getNumberOfClusters());
Cluster[] clust = clustModel.getClusters();
for (int i = 0; i < clust.length; i++)
System.out.println("Clust["+i+"]: " + clust[i].toString() );
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -