⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 basickmean.java

📁 dragontoolkit用于机器学习
💻 JAVA
字号:
package dragon.ir.clustering;

import dragon.ir.clustering.clustermodel.*;
import dragon.ir.index.*;
import java.util.Random;

/**
 * <p>Basic KMeans clustering</p>
 * <p></p>
 * <p>Copyright: Copyright (c) 2005</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class BasicKMean extends AbstractClustering{
    protected ClusterModel distMetric;
    protected boolean initialized;
    protected int maxIteration;
    private boolean initAllObjs;

    public BasicKMean(IndexReader indexReader,ClusterModel distMetric, int clusterNum){
        this(indexReader,distMetric,clusterNum,false);
    }

    public BasicKMean(IndexReader indexReader,ClusterModel distMetric, int clusterNum, boolean initAllObjs) {
        super(indexReader);
        this.clusterNum =clusterNum;
        this.distMetric =distMetric;
        this.initialized =false;
        this.maxIteration =200;
        this.randomSeed=0;
        this.initAllObjs =initAllObjs;
    }

    public BasicKMean(IndexReader indexReader,ClusterModel distMetric, DocClusterSet initClusterSet) {
        super(indexReader);
        this.clusterNum =initClusterSet.getClusterNum();
        this.distMetric =distMetric;
        this.clusterSet=initClusterSet;
        distMetric.setDocClusters(initClusterSet);
        this.initialized =true;
        this.maxIteration =200;
    }

    /**
     * If this option is true, the algorithm randomly assign all objects to the given number of clusters during initialization.
     * Otherwise, it picks only one object for each cluster during initialization.
     * @param option whether use all objects for initialization
     */
    public void setUseAllObjectForInitialization(boolean option){
        this.initAllObjs =option;
    }

    public boolean getUseAllObjectForInitialization(){
        return initAllObjs;
    }

    protected boolean initialize(IRDoc[] arrDoc){
        Random random;
        int i, curDocNo;

        random=new Random();
        if(randomSeed>0)
            random.setSeed(randomSeed);
        clusterSet=new DocClusterSet(clusterNum);
        for(i=0;i<arrDoc.length;i++)
            arrDoc[i].setCategory(-1);

        i=0;
        //randomly assign one object to each cluster
        while(i<clusterNum){
            curDocNo=(int)(random.nextDouble()*arrDoc.length);
            if(arrDoc[curDocNo].getCategory()==-1){
                clusterSet.addDoc(i,arrDoc[curDocNo]);
                i++;
            }
        }
        // randomly assign remaining objects to given clusters if initAllObjs option is true.
        for(i=0;initAllObjs && i<arrDoc.length;i++){
            if(arrDoc[i].getCategory()!=-1)
                continue;
            clusterSet.addDoc(random.nextInt(clusterNum),arrDoc[i]);
        }
        distMetric.setDocClusters(clusterSet);
        return true;
    }

    public boolean cluster(IRDoc[] arrDoc){
        Random random;
        double curDist, minDist;
        int[] arrCanCluster;
        int candidateNum, movingObj, docNum, curCluster,oldCluster,iteration;
        int i,j;

        //initialization
        if(featureFilter!=null){
            featureFilter.initialize(indexReader,arrDoc);
            distMetric.setFeatureFilter(featureFilter);
        }
        random=new Random();
        if(randomSeed>0)
            random.setSeed(randomSeed);
        arrCanCluster=new int[clusterNum];
        docNum=arrDoc.length;
        movingObj =docNum;
        iteration = 0;
        if(!initialized && !initialize(arrDoc))
           return false;

        //loop
        while(movingObj>0 && iteration<maxIteration){
            if(showProgress){
                System.out.print((new java.util.Date()).toString()+" "+iteration++);
                System.out.print(" ");
                System.out.println(movingObj);
            }

            movingObj=0;
            //assign objects to new cluster
            for (i = 0; i < docNum; i++) {
                minDist=Double.MAX_VALUE;

                //find out the closest cluster
                candidateNum=0;
                for (j = 0; j < clusterNum; j++) {
                    curDist = distMetric.getDistance(arrDoc[i],j);
                    if (curDist<=minDist-0.00001) {
                        minDist = curDist;
                        arrCanCluster[0]=j;
                        candidateNum = 1;
                    }
                    else if(Math.abs(curDist-minDist)<0.00001){
                        if(curDist<minDist)
                            minDist=curDist;
                        arrCanCluster[candidateNum]=j;
                        candidateNum++;
                    }
                }
                if(candidateNum==1)
                    curCluster=arrCanCluster[0];
                else{
                    curCluster=(int)(random.nextDouble()*candidateNum);
                    if(curCluster==candidateNum)
                        curCluster=arrCanCluster[curCluster-1];
                    else
                        curCluster=arrCanCluster[curCluster];
                }
                // end of finding

                oldCluster=arrDoc[i].getCategory();
                if(curCluster!=oldCluster)
                //if the old cluster contains only one document, don't remove the document from the old cluster
                //if(curCluster!=oldCluster && (oldCluster<0 || clusterSet.getDocCluster(oldCluster).getDocNum()>=2))
                {
                    clusterSet.removeDoc(arrDoc[i].getCategory(),arrDoc[i]);
                    clusterSet.addDoc(curCluster,arrDoc[i]);
                    movingObj++;
                }
            }

            //re-compute the centroids
            if(movingObj==0) break;
            distMetric.setDocClusters(clusterSet);
        }
        return true;
    }

    public int getMaxIteration(){
        return maxIteration;
    }

    public void setMaxIteration(int iteration){
        this.maxIteration =iteration;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -