⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 optics.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Copyright (C) 2004 *    & Matthias Schubert (schubert@dbs.ifi.lmu.de) *    & Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de) *    & Rainer Holzmann (holzmann@cip.ifi.lmu.de)  */package weka.clusterers;import weka.clusterers.forOPTICSAndDBScan.DataObjects.DataObject;import weka.clusterers.forOPTICSAndDBScan.Databases.Database;import weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer;import weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.SERObject;import weka.clusterers.forOPTICSAndDBScan.Utils.EpsilonRange_ListElement;import weka.clusterers.forOPTICSAndDBScan.Utils.UpdateQueue;import weka.clusterers.forOPTICSAndDBScan.Utils.UpdateQueueElement;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.filters.Filter;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.io.BufferedWriter;import java.io.FileWriter;import java.lang.reflect.Constructor;import java.lang.reflect.InvocationTargetException;import java.text.DecimalFormat;import java.util.Calendar;import java.util.Enumeration;import java.util.GregorianCalendar;import java.util.Iterator;import java.util.List;import java.util.Vector;/** <!-- globalinfo-start --> * Mihael Ankerst, Markus M. Breunig, Hans-Peter Kriegel, Joerg Sander: OPTICS: Ordering Points To Identify the Clustering Structure. In: ACM SIGMOD International Conference on Management of Data, 49-60, 1999. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Ankerst1999, *    author = {Mihael Ankerst and Markus M. Breunig and Hans-Peter Kriegel and Joerg Sander}, *    booktitle = {ACM SIGMOD International Conference on Management of Data}, *    pages = {49-60}, *    publisher = {ACM Press}, *    title = {OPTICS: Ordering Points To Identify the Clustering Structure}, *    year = {1999} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -E &lt;double&gt; *  epsilon (default = 0.9)</pre> *  * <pre> -M &lt;int&gt; *  minPoints (default = 6)</pre> *  * <pre> -I &lt;String&gt; *  index (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)</pre> *  * <pre> -D &lt;String&gt; *  distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)</pre> *  * <pre> -F *  write results to OPTICS_#TimeStamp#.TXT - File</pre> *  <!-- options-end --> * * @author Matthias Schubert (schubert@dbs.ifi.lmu.de) * @author Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de) * @author Rainer Holzmann (holzmann@cip.ifi.lmu.de) * @version $Revision: 1.6 $ */public class OPTICS     extends Clusterer     implements OptionHandler, TechnicalInformationHandler {    /** for serialization */    static final long serialVersionUID = 274552680222105221L;      /**     * Specifies the radius for a range-query     */    private double epsilon = 0.9;    /**     * Specifies the density (the range-query must contain at least minPoints DataObjects)     */    private int minPoints = 6;    /**     * Replace missing values in training instances     */    private ReplaceMissingValues replaceMissingValues_Filter;    /**     * Holds the number of clusters generated     */    private int numberOfGeneratedClusters;    /**     * Holds the distance-type that is used     * (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)     */    private String database_distanceType = "weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject";    /**     * Holds the type of the used database     * (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)     */    private String database_Type = "weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase";    /**     * The database that is used for OPTICS     */    private Database database;    /**     * Holds the time-value (seconds) for the duration of the clustering-process     */    private double elapsedTime;    /**     * Flag that indicates if the results are written to a file or not     */    private boolean writeOPTICSresults = false;    /**     * Holds the ClusterOrder (dataObjects with their r_dist and c_dist) for the GUI     */    private FastVector resultVector;    // *****************************************************************************************************************    // constructors    // *****************************************************************************************************************    // *****************************************************************************************************************    // methods    // *****************************************************************************************************************    /**     * Returns default capabilities of the clusterer.     *     * @return      the capabilities of this clusterer     */    public Capabilities getCapabilities() {      Capabilities result = super.getCapabilities();      // attributes      result.enable(Capability.NOMINAL_ATTRIBUTES);      result.enable(Capability.NUMERIC_ATTRIBUTES);      result.enable(Capability.DATE_ATTRIBUTES);      result.enable(Capability.MISSING_VALUES);      return result;    }    /**     * Generate Clustering via OPTICS     * @param instances The instances that need to be clustered     * @throws java.lang.Exception If clustering was not successful     */    public void buildClusterer(Instances instances) throws Exception {        // can clusterer handle the data?        getCapabilities().testWithFail(instances);        resultVector = new FastVector();        long time_1 = System.currentTimeMillis();        numberOfGeneratedClusters = 0;        replaceMissingValues_Filter = new ReplaceMissingValues();        replaceMissingValues_Filter.setInputFormat(instances);        Instances filteredInstances = Filter.useFilter(instances, replaceMissingValues_Filter);        database = databaseForName(getDatabase_Type(), filteredInstances);        for (int i = 0; i < database.getInstances().numInstances(); i++) {            DataObject dataObject = dataObjectForName(getDatabase_distanceType(),                    database.getInstances().instance(i),                    Integer.toString(i),                    database);            database.insert(dataObject);        }        database.setMinMaxValues();        UpdateQueue seeds = new UpdateQueue();        /** OPTICS-Begin */        Iterator iterator = database.dataObjectIterator();        while (iterator.hasNext()) {            DataObject dataObject = (DataObject) iterator.next();            if (!dataObject.isProcessed()) {                expandClusterOrder(dataObject, seeds);            }        }        long time_2 = System.currentTimeMillis();        elapsedTime = (double) (time_2 - time_1) / 1000.0;        if (writeOPTICSresults) {            String fileName = "";            GregorianCalendar gregorianCalendar = new GregorianCalendar();            String timeStamp = gregorianCalendar.get(Calendar.DAY_OF_MONTH) + "-" +                    (gregorianCalendar.get(Calendar.MONTH) + 1) +                    "-" + gregorianCalendar.get(Calendar.YEAR) +                    "--" + gregorianCalendar.get(Calendar.HOUR_OF_DAY) +                    "-" + gregorianCalendar.get(Calendar.MINUTE) +                    "-" + gregorianCalendar.get(Calendar.SECOND);            fileName = "OPTICS_" + timeStamp + ".TXT";            FileWriter fileWriter = new FileWriter(fileName);            BufferedWriter bufferedOPTICSWriter = new BufferedWriter(fileWriter);            for (int i = 0; i < resultVector.size(); i++) {                bufferedOPTICSWriter.write(format_dataObject((DataObject) resultVector.elementAt(i)));            }            bufferedOPTICSWriter.flush();            bufferedOPTICSWriter.close();        }        new OPTICS_Visualizer(getSERObject(), "OPTICS Visualizer - Main Window");    }    /**     * Expands the ClusterOrder for this dataObject     * @param dataObject Start-DataObject     * @param seeds SeedList that stores dataObjects with reachability-distances     */    private void expandClusterOrder(DataObject dataObject, UpdateQueue seeds) {        List list = database.coreDistance(getMinPoints(), getEpsilon(), dataObject);        List epsilonRange_List = (List) list.get(1);        dataObject.setReachabilityDistance(DataObject.UNDEFINED);        dataObject.setCoreDistance(((Double) list.get(2)).doubleValue());        dataObject.setProcessed(true);        resultVector.addElement(dataObject);        if (dataObject.getCoreDistance() != DataObject.UNDEFINED) {            update(seeds, epsilonRange_List, dataObject);            while (seeds.hasNext()) {                UpdateQueueElement updateQueueElement = seeds.next();                DataObject currentDataObject = (DataObject) updateQueueElement.getObject();                currentDataObject.setReachabilityDistance(updateQueueElement.getPriority());                List list_1 = database.coreDistance(getMinPoints(), getEpsilon(), currentDataObject);                List epsilonRange_List_1 = (List) list_1.get(1);                currentDataObject.setCoreDistance(((Double) list_1.get(2)).doubleValue());                currentDataObject.setProcessed(true);                resultVector.addElement(currentDataObject);                if (currentDataObject.getCoreDistance() != DataObject.UNDEFINED) {                    update(seeds, epsilonRange_List_1, currentDataObject);                }            }        }    }    /**     * Wraps the dataObject into a String, that contains the dataObject's key, the dataObject itself,     * the coreDistance and its reachabilityDistance in a formatted manner.     * @param dataObject The dataObject that is wrapped into a formatted string.     * @return String Formatted string     */    private String format_dataObject(DataObject dataObject) {        StringBuffer stringBuffer = new StringBuffer();        stringBuffer.append("(" + Utils.doubleToString(Double.parseDouble(dataObject.getKey()),                (Integer.toString(database.size()).length()), 0) + ".) "                + Utils.padRight(dataObject.toString(), 40) + "  -->  c_dist: " +                ((dataObject.getCoreDistance() == DataObject.UNDEFINED) ?                Utils.padRight("UNDEFINED", 12) :                Utils.padRight(Utils.doubleToString(dataObject.getCoreDistance(), 2, 3), 12)) +                " r_dist: " +                ((dataObject.getReachabilityDistance() == DataObject.UNDEFINED) ?                Utils.padRight("UNDEFINED", 12) :                Utils.doubleToString(dataObject.getReachabilityDistance(), 2, 3)) + "\n");        return stringBuffer.toString();    }    /**     * Updates reachability-distances in the Seeds-List     * @param seeds UpdateQueue that holds DataObjects with their corresponding reachability-distances     * @param epsilonRange_list List of DataObjects that were found in epsilon-range of centralObject     * @param centralObject     */    private void update(UpdateQueue seeds, List epsilonRange_list, DataObject centralObject) {        double coreDistance = centralObject.getCoreDistance();        double new_r_dist = DataObject.UNDEFINED;        for (int i = 0; i < epsilonRange_list.size(); i++) {            EpsilonRange_ListElement listElement = (EpsilonRange_ListElement) epsilonRange_list.get(i);            DataObject neighbourhood_object = listElement.getDataObject();            if (!neighbourhood_object.isProcessed()) {                new_r_dist = Math.max(coreDistance, listElement.getDistance());                seeds.add(new_r_dist, neighbourhood_object, neighbourhood_object.getKey());            }        }    }    /**     * Classifies a given instance.     *     * @param instance The instance to be assigned to a cluster     * @return int The number of the assigned cluster as an integer     * @throws java.lang.Exception If instance could not be clustered     * successfully     */    public int clusterInstance(Instance instance) throws Exception {        throw new Exception();    }    /**     * Returns the number of clusters.     *     * @return int The number of clusters generated for a training dataset.     * @throws java.lang.Exception If number of clusters could not be returned     * successfully     */    public int numberOfClusters() throws Exception {        return numberOfGeneratedClusters;    }    /**     * Returns an enumeration of all the available options.     *     * @return Enumeration An enumeration of all available options.     */    public Enumeration listOptions() {        Vector vector = new Vector();        vector.addElement(                new Option("\tepsilon (default = 0.9)",                        "E",                        1,                        "-E <double>"));        vector.addElement(                new Option("\tminPoints (default = 6)",                        "M",                        1,                        "-M <int>"));        vector.addElement(                new Option("\tindex (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)",                        "I",                        1,                        "-I <String>"));        vector.addElement(

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -