📄 dbscan.java
字号:
package xmu.hll.Models.Clustering.CDBased.Algorithms.DBSCAN;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Vector;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;
import xmu.hll.Models.Clustering.CDBased.Algorithms.DBSCAN.DataPoint;
public class DBSCAN extends CDBasedClusteringAlgorithm {
//初始时获得的数据---------------------------------------------------------------
private int eps; //半径
private int minPoints; //最少的点的数目
private int numAtt; //属性的个数
private int m_Core_Num; //核心点的数目
private int m_MinPts; //最少的点的数目
private int m_Cluster_Num; //簇的数目
private double m_eps; //半径
public ArrayList m_cluster;
public ArrayList temp = new ArrayList(128);//临时存放同一簇点的ArrayList
public ArrayList dataPoints = new ArrayList(128); //初始数据的ArrayList
private ArrayList dp2dp;
public DBSCAN() {}
//添加数据点到簇
// public void AddDataPoint(DataPoint dp) {
// dataPoints.add(dp);
// m_Core_Num = 0;
// m_MinPts = 0;
// m_eps = 0;
// }
//移除簇中的数据点
public void RemoveAllDataPoints() {
dataPoints.clear();
m_Core_Num = 0;
m_MinPts = 0;
m_eps = 0;
}
//重置所有的数据点的状态
public void ResetAllDataPointsState() {
DataPoint dp;
for(int i = 0; i < dataPoints.size(); i++) {
dp = (DataPoint) dataPoints.get(i);
dp.class_id = 0; //簇的id
dp.core_tag = false; //是否是核心点
dp.used_tag = false; //是否已经使用
}
}
@SuppressWarnings("unchecked")
public void PrepareDBSCAN_Table() {
int dp_count = dataPoints.size(); //数据的数目
//初始化dp2dp,并向dp2dp中加入一个长度为数据数目的ArrayList
dp2dp = new ArrayList(dp_count);
for(int i=0;i<dp_count;i++) {
dp2dp.add(new ArrayList(dp_count));
}
ArrayList tempList; //临时的ArrayList,用于保存一次比较距离的排序结果
for(int i = 0; i < dp_count; i++) {
tempList = (ArrayList) dp2dp.get(i);
DataPoint dp1 = (DataPoint) dataPoints.get(i);
for(int j = 0; j < dp_count; j++) {
DataPoint dp2 = (DataPoint) dataPoints.get(j);
dp2.Distance(dp1); //改变dp2的distance属性的值
tempList.add(dataPoints.get(j));
}
Collections.sort(tempList,new DBSCANSort());
dp2dp.set(i,tempList);
}
//------------------------测试dp2dp
// for(int a = 0; a < dp2dp.size(); a++) {
// ArrayList list = (ArrayList) dp2dp.get(a);
// String s = "";
//
// for(int b = 0; b < list.size(); b++) {
// DataPoint d = (DataPoint) list.get(b);
// s += d.d1+","+d.d2+" ";
// }
// JOptionPane.showMessageDialog(null, s);
// }
}
//建立核心点
public int BuildCorePoint(double eps, int minPts) {
ResetAllDataPointsState(); //重置所有点的状态
int core_num = 0; //核心点的数目
DataPoint src_dp, des_dp;
for(int i = 0; i < dataPoints.size(); i++) {
ArrayList tempList =(ArrayList) dp2dp.get(i);
des_dp = (DataPoint) tempList.get(minPts);
src_dp = (DataPoint) dataPoints.get(i);
if(src_dp.Distance(des_dp) <= eps) {
src_dp.core_tag = true;
core_num++;
}
}
if(core_num > 0) {
m_Core_Num = core_num;
m_MinPts = minPts;
m_eps = eps;
}
return core_num;
}
@SuppressWarnings("unchecked")
public void DBSCAN_Cluster() {
DataPoint dp;
int current_class_id = 0;
m_cluster = new ArrayList(128);
for(int i = 0; i < 128; i++) {
m_cluster.add(new ArrayList(128));
}
for(int i = 0; i < dataPoints.size(); i++) {
dp=(DataPoint)dataPoints.get(i);
if(dp.core_tag == true && dp.used_tag == false) {
dp.class_id = current_class_id + 1;
dp.used_tag = true;
CorePointCluster(i, current_class_id + 1);
temp.add(dp);
m_cluster.set(current_class_id, temp);
current_class_id++;
}
}
m_Cluster_Num = current_class_id;
for(int i = 0; i < dataPoints.size(); i++){
dp = (DataPoint) dataPoints.get(i);
if(dp.class_id == 0){
ArrayList temp1 = new ArrayList();
temp1.add(dp);
m_cluster.set(m_Cluster_Num, temp1);
m_Cluster_Num++;
}
}
}
@SuppressWarnings("unchecked")
private void CorePointCluster(int dp_pos, int core_class_id) {
DataPoint src_dp, des_dp;
ArrayList sl=(ArrayList) dp2dp.get(dp_pos);
src_dp=(DataPoint)sl.get(0);
int i=1;
des_dp=(DataPoint)sl.get(i);
while(src_dp.Distance(des_dp) <= m_eps) {
if(des_dp.used_tag == false) {
des_dp.class_id = core_class_id;
des_dp.used_tag = true;
temp.add(des_dp);
if(des_dp.core_tag == true) {
CorePointCluster(dataPoints.indexOf(des_dp),core_class_id);
}
}
i++;
try {
des_dp=(DataPoint)sl.get(i);
}
catch( IndexOutOfBoundsException e ) {
// To avoid eps is too large that out of index
return;
}
}
}
@SuppressWarnings("unchecked")
@Override
protected void runAlgorithm() throws MiningException {
//获取所有的属性
numAtt = metaData.getAttributesNumber();
ResetAllDataPointsState();
RemoveAllDataPoints();
//获取所有的数据点,并添加到dataPoints
while (miningInputStream.next()) {
DataPoint dp = new DataPoint(miningInputStream.readAttributeValue(1), miningInputStream.readAttributeValue(2));
dataPoints.add(dp);
}
PrepareDBSCAN_Table();
BuildCorePoint(getEps(), getMinPoints());
DBSCAN_Cluster();
//实例化clusters,并设置每一簇的名字
clusters = new Cluster[m_Cluster_Num];
for(int i = 0; i < m_Cluster_Num; i++) {
clusters[i] = new Cluster();
clusters[i].setName("clust" + i);
}
//设置核心点---------------------------------------------------------------------
for(int a = 0; a < m_Cluster_Num; a++) {
ArrayList list = (ArrayList) m_cluster.get(a);
double value[] = new double[numAtt];
MiningVector centerVec = new MiningVector(value);
centerVec.setMetaData(metaData);
if(list.size()>1) {
for(int b = 0; b < list.size(); b++) {
DataPoint d = (DataPoint) list.get(b);
if(d.core_tag) {
centerVec.setValue(1, d.d1);
centerVec.setValue(2, d.d2);
}
}
}
else {
DataPoint d = (DataPoint) list.get(0);
centerVec.setValue(1, d.d1);
centerVec.setValue(2, d.d2);
}
clusters[a].setCenterVec(centerVec);
}
//设置每一簇的点---------------------------------------------------------------------
Vector allContainedVectors[] = new Vector[m_Cluster_Num];
for(int i = 0; i < m_Cluster_Num; i++) {
allContainedVectors[i] = new Vector();
}
for(int a = 0; a < m_Cluster_Num; a++) {
ArrayList list = (ArrayList) m_cluster.get(a);
for(int b = 0; b < list.size(); b++) {
double value[] = new double[numAtt];
MiningVector centerVec = new MiningVector(value);
centerVec.setMetaData(metaData);
DataPoint d = (DataPoint) list.get(b);
centerVec.setValue(1, d.d1);
centerVec.setValue(2, d.d2);
allContainedVectors[a].add(b, centerVec);
}
}
for(int i = 0; i < m_Cluster_Num; i++) {
clusters[i].setContainedVectors(allContainedVectors[i]);
}
}
public int getEps() {
return eps;
}
public void setEps(int eps) {
this.eps = eps;
}
public int getMinPoints() {
return minPoints;
}
public void setMinPoints(int minPoints) {
this.minPoints = minPoints;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -