📄 dbscan_file.bas
字号:
Attribute VB_Name = "Module1"
' **********************************************************************
' DBSCAN_File.vbp
' 01.03.2005
' Adriano Moreira, Maribel Y. Santos e Sofia Carneiro
' **********************************************************************
Option Explicit
Option Base 1
Type InfArray
cod As Integer
Point As String
X As Double
Y As Double
PointsDDR As New Collection
PointType As String
ClusterID As Integer
KNearestDist As Double
End Type
Type ClusterArrayInf
Clusters As New Collection
State As Integer
End Type
Type KnearestInf
Point As Integer
Dist As Double
End Type
Public MyConnect As New Connection
Public AverageDist As Double
Public ClusterID As Integer
Public DataSet() As InfArray
Public ClusterArray() As ClusterArrayInf
Public Const K As Integer = 7
Public Const Path As String = "C:\...\DataSet1.txt"
Public Sub Main()
Dim MinPts As Integer, i As Integer, j As Integer
Dim Point As String
Dim Eps As Double
Dim RecSet As DAO.Recordset
Eps = 0.007
MinPts = 4
ClusterID = 0
Call LoadDataSet
Call CalcKNearest
For i = 1 To UBound(DataSet())
AverageDist = AverageDist + DataSet(i).KNearestDist
Next i
AverageDist = AverageDist / UBound(DataSet())
Call CheckDDRPoints(Eps, MinPts, RecSet)
For i = 1 To UBound(DataSet())
If DataSet(i).PointType = "Border" And DataSet(i).ClusterID = 0 Then
DataSet(i).ClusterID = -1
ElseIf DataSet(i).ClusterID = 0 Then
ClusterID = ClusterID + 1
Call FindClusters(i, AverageDist, MinPts)
End If
Next i
ReDim ClusterArray(0 To ClusterID)
For j = 1 To UBound(DataSet())
If DataSet(j).ClusterID = -1 Then
ClusterArray(0).Clusters.Add j
ClusterArray(0).State = 0
Else
ClusterArray(DataSet(j).ClusterID).Clusters.Add j
ClusterArray(DataSet(j).ClusterID).State = 0
End If
Next j
Call VerifyClusters(Eps, AverageDist)
Call StoreResults(RecSet)
MsgBox "Clustering finished!"
End Sub
Public Sub LoadDataSet()
' Load the data to cluster into the DataSet() array
Dim DataFile
Dim Coord_X As Double, Coord_Y As Double
Dim Size As Integer, cod As Integer, cluster As Integer
DataFile = Path
Size = 0
Open DataFile For Input As #1
While Not EOF(1)
Size = Size + 1
ReDim Preserve DataSet(Size)
Input #1, cod, Coord_X, Coord_Y
DataSet(Size).cod = Size
DataSet(Size).Point = cod
DataSet(Size).X = Coord_X
DataSet(Size).Y = Coord_Y
Wend
Close #1
End Sub
Public Function CalcKNearest() As Double
' Get the K nearest neighbors of each points
Dim Knearest(K) As KnearestInf
Dim i As Integer, j As Integer, Pos As Integer, Size As Integer
Dim Dist As Double
Dim DataFile
For i = 1 To UBound(DataSet)
Size = 1
For j = 1 To UBound(DataSet())
Dist = 0
If j <> i Then
Dist = Sqr(((DataSet(i).X - DataSet(j).X) ^ 2) + ((DataSet(i).Y - DataSet(j).Y) ^ 2))
If Size <= K Then
Knearest(Size).Dist = Dist
Knearest(Size).Point = j
Size = Size + 1
Else
Pos = GetMax(Knearest)
If Dist < Knearest(Pos).Dist Then
Knearest(Pos).Dist = Dist
Knearest(Pos).Point = j
End If
End If
End If
Next j
Pos = GetMax(Knearest)
Dist = Sqr(((DataSet(i).X - DataSet(Knearest(Pos).Point).X) ^ 2) + ((DataSet(i).Y - DataSet(Knearest(Pos).Point).Y) ^ 2))
DataSet(i).KNearestDist = Dist
Erase Knearest
Next i
Call SortAuxArray(DataSet)
End Function
Public Sub SetMedianValue()
Dim Indic As Double, AverageDist As Double
Dim AuxArray() As InfArray
'Calcule Median
ReDim AuxArray(UBound(DataSet()))
AuxArray = DataSet
Call SortAuxArray(AuxArray)
If ((UBound(AuxArray())) Mod 2) = 0 Then
Indic = UBound(AuxArray()) / 2
AverageDist = (AuxArray(Indic).KNearestDist + AuxArray(Indic + 1).KNearestDist) / 2
Else
Indic = (UBound(AuxArray()) \ 2) + 1
AverageDist = (AuxArray(Indic).KNearestDist)
End If
End Sub
Public Sub CheckDDRPoints(Eps As Double, MinPts As Integer, RecSet As DAO.Recordset)
' Identify the Directly Density-Reachable points
Dim i As Integer, j As Integer, count As Integer
Dim Dist As Double
Dim Mycollect As New Collection
For i = 1 To UBound(DataSet)
Dist = 0
count = 0
Set Mycollect = Nothing
For j = 1 To UBound(DataSet)
Dist = Sqr(((DataSet(i).X - DataSet(j).X) ^ 2) + ((DataSet(i).Y - DataSet(j).Y) ^ 2))
If Dist <= Eps Then
count = count + 1
Mycollect.Add DataSet(j).Point
End If
Next j
If count >= MinPts Then
DataSet(i).PointType = "Core"
Set DataSet(i).PointsDDR = Mycollect
Else
DataSet(i).PointType = "Border"
End If
DataSet(i).ClusterID = 0
Next i
End Sub
Public Sub FindClusters(ByVal Point As String, Eps As Double, MinPts As Integer)
' Start to classify the points into clusters
Dim i As Integer
For i = 1 To UBound(DataSet())
Call GetDRPoints(Point, Eps, MinPts)
Next i
End Sub
Public Sub GetDRPoints(ByVal PointPos As Integer, Eps As Double, MinPts As Integer)
' Procedure that identify the Density-Reachable points
Dim MySet As New Collection
Dim i As Integer, j As Integer, K As Integer, l As Integer
Dim Found As Boolean
Dim Member As Variant
Dim NewCluster As New Collection
For i = 1 To DataSet(PointPos).PointsDDR.count
NewCluster.Add DataSet(PointPos).PointsDDR.Item(i)
If DataSet(PointPos).Point <> DataSet(PointPos).PointsDDR.Item(i) Then
MySet.Add DataSet(PointPos).PointsDDR.Item(i)
End If
Next i
For Each Member In MySet
For j = 1 To UBound(DataSet())
If DataSet(j).Point = Member Then
For K = 1 To DataSet(j).PointsDDR.count
Found = False
If DataSet(j).PointsDDR.Item(K) <> DataSet(PointPos).Point Then
For l = 1 To MySet.count
If DataSet(j).PointsDDR.Item(K) = MySet.Item(l) Then
Found = True
Exit For
End If
Next l
If Found = True Then
GoTo a:
Else
MySet.Add DataSet(j).PointsDDR.Item(K)
End If
End If
a: Next K
Exit For
End If
Next j
Next
DataSet(PointPos).ClusterID = ClusterID
For i = 1 To MySet.count
For j = 1 To UBound(DataSet())
If DataSet(j).Point = MySet.Item(i) Then
DataSet(j).ClusterID = ClusterID
End If
Next j
Next i
End Sub
Public Sub VerifyClusters(Eps As Double, AverageDist As Double)
' Procedure that verify the composition of the clusters
Dim i As Integer, j As Integer, Size As Integer
Dim Dist As Double
Dim AuxCol As New Collection
Dim Member, Member1, Member2, Member3
Dim AuxArray() As ClusterArrayInf
Dim Exist As Boolean
Exist = False
Size = 1
ReDim Preserve AuxArray(0 To Size)
For i = 1 To UBound(ClusterArray())
Set AuxCol = Nothing
If ClusterArray(i).State = 0 Then
For Each Member In ClusterArray(i).Clusters
For j = 1 To UBound(ClusterArray())
If j <> i Then
For Each Member1 In ClusterArray(j).Clusters
If ClusterArray(j).State = 0 Then
Dist = Sqr(((DataSet(Member).X - DataSet(Member1).X) ^ 2) + ((DataSet(Member).Y - DataSet(Member1).Y) ^ 2))
If Dist < Eps Then
AuxCol.Add j
ClusterArray(j).State = 1
Exit For
End If
End If
Next
End If
Next j
Next
If AuxCol.count <> 0 Then
For Each Member In ClusterArray(i).Clusters
AuxArray(Size - 1).Clusters.Add Member
Next
For Each Member2 In AuxCol
For Each Member3 In ClusterArray(Member2).Clusters
AuxArray(Size - 1).Clusters.Add Member3
Next
Next
Size = Size + 1
Exist = True
Else
ReDim Preserve AuxArray(0 To Size)
For Each Member In ClusterArray(i).Clusters
AuxArray(Size - 1).Clusters.Add Member
Next
Size = Size + 1
End If
ClusterArray(i).State = 1
End If
Next i
If Exist = True Then
AuxArray(0) = ClusterArray(0)
ReDim ClusterArray(UBound(AuxArray()))
ClusterArray = AuxArray
End If
End Sub
Public Sub StoreResults(RecSet As DAO.Recordset)
' Stores clustering results
Dim i As Integer, j As Integer
Dim DataFile, Member
DataFile = Path & "_results.txt"
Open DataFile For Output As #1
For i = 0 To UBound(ClusterArray())
If i = 0 Then
For Each Member In ClusterArray(i).Clusters
Write #1, DataSet(Member).cod, DataSet(Member).Point, "Noise"
Next
Else
For Each Member In ClusterArray(i).Clusters
Write #1, DataSet(Member).cod, DataSet(Member).Point, "Cluster" & i
Next
End If
Next i
Close #1
End Sub
' **** AUXILIARES PROCEDURES ****
Public Sub SortAuxArray(AuxArray() As InfArray)
' Ascendent sorting of AuxArray()
Dim i As Integer, j As Integer
Dim temp() As InfArray
ReDim temp(1)
For i = 1 To UBound(AuxArray()) - 1
For j = i + 1 To UBound(AuxArray)
If (AuxArray(i).KNearestDist) > (AuxArray(j).KNearestDist) Then
'swap places
temp(1) = AuxArray(i)
AuxArray(i) = AuxArray(j)
AuxArray(j) = temp(1)
End If
Next j
Next i
End Sub
Public Function GetMax(Knearest() As KnearestInf) As Integer
' Get the Maximum of Knearest()
Dim i As Integer, Pos As Integer
Dim Max As Double
For i = 1 To UBound(Knearest())
If i = 1 Then
Max = Knearest(i).Dist
Pos = i
Else
If Knearest(i).Dist > Max Then
Max = Knearest(i).Dist
Pos = i
End If
End If
Next i
GetMax = Pos
End Function
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -