⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dbscan.java

📁 DBSCAN的Java 数据挖掘 源代码
💻 JAVA
字号:
package xmu.hll.Models.Clustering.CDBased.Algorithms.DBSCAN;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Vector;

import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;

import xmu.hll.Models.Clustering.CDBased.Algorithms.DBSCAN.DataPoint;

public class DBSCAN extends CDBasedClusteringAlgorithm {
	//初始时获得的数据---------------------------------------------------------------
	private int eps;    //半径
	private int minPoints;    //最少的点的数目
	private int numAtt;    //属性的个数
	
	private int m_Core_Num;    //核心点的数目
	private int m_MinPts;    //最少的点的数目
	private int m_Cluster_Num;    //簇的数目
	private double m_eps;    //半径
	public ArrayList m_cluster;
	public ArrayList temp = new ArrayList(128);//临时存放同一簇点的ArrayList
	public ArrayList dataPoints = new ArrayList(128);    //初始数据的ArrayList
	private ArrayList dp2dp;
	
	
	public DBSCAN() {}
	
	//添加数据点到簇
//	public void AddDataPoint(DataPoint dp) {
//		dataPoints.add(dp);
//		m_Core_Num = 0;
//		m_MinPts = 0;
//		m_eps = 0;
//	}

	//移除簇中的数据点
	public void RemoveAllDataPoints() {
		dataPoints.clear();
		m_Core_Num = 0;
		m_MinPts = 0;
		m_eps = 0;
	}

	//重置所有的数据点的状态
	public void ResetAllDataPointsState() {
		DataPoint dp;
		
		for(int i = 0; i < dataPoints.size(); i++) {
			dp = (DataPoint) dataPoints.get(i);
			dp.class_id = 0;    //簇的id
			dp.core_tag = false;    //是否是核心点
			dp.used_tag = false;    //是否已经使用
		}
	}
	
	@SuppressWarnings("unchecked")
	public void PrepareDBSCAN_Table() {
		int dp_count = dataPoints.size();    //数据的数目
		
		//初始化dp2dp,并向dp2dp中加入一个长度为数据数目的ArrayList
		dp2dp = new ArrayList(dp_count);
		
		for(int i=0;i<dp_count;i++) {
			dp2dp.add(new ArrayList(dp_count)); 
		}
		
		ArrayList tempList;    //临时的ArrayList,用于保存一次比较距离的排序结果
		
		for(int i = 0; i < dp_count; i++) {
			tempList = (ArrayList) dp2dp.get(i);
			DataPoint dp1 = (DataPoint) dataPoints.get(i);
			
			for(int j = 0; j < dp_count; j++) {
				DataPoint dp2 = (DataPoint) dataPoints.get(j);
				dp2.Distance(dp1);    //改变dp2的distance属性的值
				tempList.add(dataPoints.get(j));
			}
			
			Collections.sort(tempList,new DBSCANSort());
			dp2dp.set(i,tempList);
		}
		
		//------------------------测试dp2dp
//		for(int a = 0; a < dp2dp.size(); a++) {
//			ArrayList list = (ArrayList) dp2dp.get(a);
//			String s = "";
//			
//			for(int b = 0; b < list.size(); b++) {
//				DataPoint d = (DataPoint) list.get(b);
//				s += d.d1+","+d.d2+"  ";
//			}
//			JOptionPane.showMessageDialog(null, s);
//		}
	}

	//建立核心点
	public int BuildCorePoint(double eps, int minPts) {
		ResetAllDataPointsState();    //重置所有点的状态
		
		int core_num = 0;    //核心点的数目
		DataPoint src_dp, des_dp;
		
		for(int i = 0; i < dataPoints.size(); i++) {
			ArrayList tempList =(ArrayList) dp2dp.get(i);
			des_dp = (DataPoint) tempList.get(minPts);
			src_dp = (DataPoint) dataPoints.get(i);
			
			if(src_dp.Distance(des_dp) <= eps) {
				src_dp.core_tag = true;
				core_num++;
			}
		}
		
		if(core_num > 0) {
			m_Core_Num = core_num;
			m_MinPts = minPts;
			m_eps = eps;
		}
		
		return core_num;
	}

	@SuppressWarnings("unchecked")
	public void DBSCAN_Cluster() {
		DataPoint dp;
		int current_class_id = 0;
		
		m_cluster = new ArrayList(128);
		
		for(int i = 0; i < 128; i++) {
			m_cluster.add(new ArrayList(128)); 
		}
		
		for(int i = 0; i < dataPoints.size(); i++) {
			dp=(DataPoint)dataPoints.get(i); 
			
			if(dp.core_tag == true && dp.used_tag == false) {
				dp.class_id = current_class_id + 1;
				dp.used_tag = true;
				CorePointCluster(i, current_class_id + 1);
				temp.add(dp);
				m_cluster.set(current_class_id, temp);
				current_class_id++;
			}
		}
		
		m_Cluster_Num = current_class_id;
		
		for(int i = 0; i < dataPoints.size(); i++){
			dp = (DataPoint) dataPoints.get(i);
			
			if(dp.class_id == 0){
				ArrayList temp1 = new ArrayList();
				temp1.add(dp);
				m_cluster.set(m_Cluster_Num, temp1);
				m_Cluster_Num++;
			}
		} 	
	}

	@SuppressWarnings("unchecked")
	private void CorePointCluster(int dp_pos, int core_class_id) {
		DataPoint src_dp, des_dp;
		ArrayList sl=(ArrayList) dp2dp.get(dp_pos);
		src_dp=(DataPoint)sl.get(0);
		int i=1;
		des_dp=(DataPoint)sl.get(i);
		
		while(src_dp.Distance(des_dp) <= m_eps) {
			if(des_dp.used_tag == false) {
				des_dp.class_id = core_class_id;
				des_dp.used_tag = true;
				temp.add(des_dp);
				
				if(des_dp.core_tag == true) {
					CorePointCluster(dataPoints.indexOf(des_dp),core_class_id);
				}
			}
			
			i++;
			
			try  {
				des_dp=(DataPoint)sl.get(i);
			}
			catch( IndexOutOfBoundsException e ) {
				// To avoid eps is too large that out of index
				return;
			}
		}
	}
	
	@SuppressWarnings("unchecked")
	@Override
	protected void runAlgorithm() throws MiningException {
		//获取所有的属性
		numAtt = metaData.getAttributesNumber();
		
		ResetAllDataPointsState();
		RemoveAllDataPoints();
		
		//获取所有的数据点,并添加到dataPoints
		while (miningInputStream.next()) {
			DataPoint dp = new DataPoint(miningInputStream.readAttributeValue(1), miningInputStream.readAttributeValue(2));
			dataPoints.add(dp);
		}		
		
		PrepareDBSCAN_Table();
		BuildCorePoint(getEps(), getMinPoints());
		DBSCAN_Cluster();
		
		//实例化clusters,并设置每一簇的名字
		clusters = new Cluster[m_Cluster_Num];
		
		for(int i = 0; i < m_Cluster_Num; i++) {
			clusters[i] = new Cluster();
			clusters[i].setName("clust" + i);
		}
		
		//设置核心点---------------------------------------------------------------------
		for(int a = 0; a < m_Cluster_Num; a++) {
			ArrayList list = (ArrayList) m_cluster.get(a);
			
			double value[] = new double[numAtt];
        	MiningVector centerVec = new MiningVector(value);
        	centerVec.setMetaData(metaData);
        	
        	if(list.size()>1) {
        		for(int b = 0; b < list.size(); b++) {
    				DataPoint d = (DataPoint) list.get(b);
    				if(d.core_tag) {
    					centerVec.setValue(1, d.d1);
    					centerVec.setValue(2, d.d2);
    					
    				}
    			}
        	}
        	else {
    			DataPoint d = (DataPoint) list.get(0);
    			centerVec.setValue(1, d.d1);
				centerVec.setValue(2, d.d2);
        	}
        	
        	clusters[a].setCenterVec(centerVec);
		}
		
		//设置每一簇的点---------------------------------------------------------------------
		Vector allContainedVectors[] = new Vector[m_Cluster_Num];
		
		for(int i = 0; i < m_Cluster_Num; i++) {
            allContainedVectors[i] = new Vector();
        }
		
		for(int a = 0; a < m_Cluster_Num; a++) {
			ArrayList list = (ArrayList) m_cluster.get(a);
			
			for(int b = 0; b < list.size(); b++) {
				double value[] = new double[numAtt];
				MiningVector centerVec = new MiningVector(value);
	        	centerVec.setMetaData(metaData);
	        	
				DataPoint d = (DataPoint) list.get(b);
				centerVec.setValue(1, d.d1);
				centerVec.setValue(2, d.d2);
				allContainedVectors[a].add(b, centerVec);
			}
		}
		
        for(int i = 0; i < m_Cluster_Num; i++) {
            clusters[i].setContainedVectors(allContainedVectors[i]);
        }
	}

	public int getEps() {
		return eps;
	}

	public void setEps(int eps) {
		this.eps = eps;
	}

	public int getMinPoints() {
		return minPoints;
	}

	public void setMinPoints(int minPoints) {
		this.minPoints = minPoints;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -