📄 kmean.cpp
字号:
// KMean.cpp: implementation of the CKMean class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <math.h>
#include "KMean.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
// Last Modify
// Maker Chen 2008/11/09
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CKMean::CKMean()
{
}
CKMean::~CKMean()
{
}
int CKMean::K_Mean(DATA_MATRIX SourceMatrix, int Process_Matrix_Count, int K_Clusters, int *Result_Cluster_Label, int *Result_Cluster_Distance, DATA_MATRIX &Result_Cluster_Center, int TotalProcessLoop, UINT Distance_Compare_Method)
{
int i, j, k, z, Process_Loop, Cluster_Temp_Counter,
Process_Final_Loop = TotalProcessLoop;
MATRIX_BASE_TYPE Temp, SortDistanceIndex, SortDistanceGap;
BOOL Update_Continue = TRUE;
int *Cluster_Label = new int[Process_Matrix_Count];
MATRIX_BASE_TYPE *Cluster_Distance = new MATRIX_BASE_TYPE[K_Clusters];
DATA_MATRIX TempMatrix, SortedMatrix, Cluster_Center, Cluster_Center_Check;
TempMatrix.Data = SortedMatrix.Data = Cluster_Center.Data = Cluster_Center_Check.Data = NULL;
InitialMatrix(TempMatrix, Process_Matrix_Count, 1, TRUE);
InitialMatrix(Cluster_Center, K_Clusters, SourceMatrix.Row, TRUE);
InitialMatrix(Cluster_Center_Check, K_Clusters, SourceMatrix.Row, TRUE);
if(SourceMatrix.Row <= 1)
{
CopyMatrix(TempMatrix, SourceMatrix);
SortMatrix(SortedMatrix, TempMatrix);
k = SortedMatrix.Col / K_Clusters;
for(i=0, j=0;i<K_Clusters;i++, j+=k)
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[i][z] = SourceMatrix.p_Column[(int)SortedMatrix.p_Column[j][0]][z];
DeleteMatrix(SortedMatrix);
}
else
{
for(i=0;i<Process_Matrix_Count;i++)
{
TempMatrix.p_Column[i][0] = 0.0;
for(j=0;j<SourceMatrix.Row;j++)
TempMatrix.p_Column[i][0] += (SourceMatrix.p_Column[i][j] * SourceMatrix.p_Column[i][j]);
TempMatrix.p_Column[i][0] = sqrt(TempMatrix.p_Column[i][0] / SourceMatrix.Row);
}
SortMatrix(SortedMatrix, TempMatrix);
k = SortedMatrix.Col / K_Clusters;
for(i=0, j=0;i<K_Clusters;i++, j+=k)
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[i][z] = SourceMatrix.p_Column[(int)SortedMatrix.p_Column[j][0]][z];
SortDistanceIndex = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0];
SortDistanceGap = fabs(TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0] - TempMatrix.p_Column[(int)(SortedMatrix.p_Column[SourceMatrix.Col - 1][0])][0]) / (MATRIX_BASE_TYPE)(K_Clusters * 2);
for(i=1, j=1;i<SortedMatrix.Col && j<K_Clusters;i++)
{
Temp = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][0];
if(fabs(Temp - SortDistanceIndex) >= SortDistanceGap)
{
SortDistanceIndex = Temp;
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[j][z] = SourceMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][z];
j++;
}
}
DeleteMatrix(SortedMatrix);
}
for(Process_Loop=0;Process_Loop<TotalProcessLoop && Update_Continue;Process_Loop++)
{
// PreProcess Distance
for(i=0;i<Process_Matrix_Count;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
Distance_Compare_Method);
Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
/* for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
{
for(z=0;z<SourceMatrix.Row;z++)
{
Temp = SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z];
Cluster_Distance[j] += (Temp * Temp);
}
// Cluster_Distance[j] /= SourceMatrix.Row;
}
Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
*/
// Update
for(j=0;j<K_Clusters;j++)
{
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] = 0.0;
Cluster_Temp_Counter = 0;
for(i=0;i<Process_Matrix_Count;i++)
if(Cluster_Label[i] == j)
{
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] += (SourceMatrix.p_Column[i][k] * SourceMatrix.p_Column[i][k]);
Cluster_Temp_Counter++;
}
if(Cluster_Temp_Counter > 0)
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] = sqrt(Cluster_Center.p_Column[j][k] / (MATRIX_BASE_TYPE)Cluster_Temp_Counter);
}
// Process Check
Temp = 0.0;
for(j=0;j<K_Clusters;j++)
for(k=0;k<SourceMatrix.Row;k++)
Temp += fabs(Cluster_Center.p_Column[j][k] - Cluster_Center_Check.p_Column[j][k]);
Temp /= (K_Clusters * SourceMatrix.Row);
if(Temp < 2.0)
{
Update_Continue = FALSE;
Process_Final_Loop = Process_Loop + 1;
}
else
{
CopyMatrix(Cluster_Center_Check, Cluster_Center);
}
}
InitialMatrix(Result_Cluster_Center, Cluster_Center.Col, Cluster_Center.Row + 1, TRUE);
// CopyMatrix(Result_Cluster_Center, Cluster_Center);
for(i=0;i<Cluster_Center.Col;i++)
{
for(j=0;j<Cluster_Center.Row;j++)
Result_Cluster_Center.p_Column[i][j] = Cluster_Center.p_Column[i][j];
Result_Cluster_Center.p_Column[i][Cluster_Center.Row] = 0;
}
for(i=0;i<Process_Matrix_Count;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
Distance_Compare_Method);
Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
Result_Cluster_Distance[i] = (int)(sqrt(Cluster_Distance[Result_Cluster_Label[i]] * SourceMatrix.Row) + 0.5);
Result_Cluster_Center.p_Column[Result_Cluster_Label[i]][Cluster_Center.Row]++;
}
/* for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
{
for(z=0;z<SourceMatrix.Row;z++)
{
Temp = SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z];
Cluster_Distance[j] += (Temp * Temp);
}
// Cluster_Distance[j] /= SourceMatrix.Row;
}
Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
*/
DeleteMatrix(TempMatrix);
DeleteMatrix(Cluster_Center);
DeleteMatrix(Cluster_Center_Check);
delete [] Cluster_Label;
Cluster_Label = NULL;
delete [] Cluster_Distance;
Cluster_Distance = NULL;
return(Process_Final_Loop);
}
int CKMean::K_Mean(DATA_MATRIX SourceMatrix, int K_Clusters, int *Result_Cluster_Label, DATA_MATRIX &Result_Cluster_Center, int TotalProcessLoop, UINT Distance_Compare_Method)
{
int i, j, k, z,
Process_Loop, Cluster_Temp_Counter,
Process_Final_Loop = TotalProcessLoop;
MATRIX_BASE_TYPE Temp, SortDistanceIndex, SortDistanceGap;
BOOL Update_Continue = TRUE;
int *Cluster_Label = new int[SourceMatrix.Col];
MATRIX_BASE_TYPE *Cluster_Distance = new MATRIX_BASE_TYPE[K_Clusters];
DATA_MATRIX TempMatrix, SortedMatrix, Cluster_Center, Cluster_Center_Check;
TempMatrix.Data = SortedMatrix.Data = Cluster_Center.Data = Cluster_Center_Check.Data = NULL;
InitialMatrix(TempMatrix, SourceMatrix.Col, 1, TRUE);
InitialMatrix(Cluster_Center, K_Clusters, SourceMatrix.Row, TRUE);
InitialMatrix(Cluster_Center_Check, K_Clusters, SourceMatrix.Row, TRUE);
if(SourceMatrix.Row <= 1)
{
CopyMatrix(TempMatrix, SourceMatrix);
SortMatrix(SortedMatrix, TempMatrix);
k = SortedMatrix.Col / K_Clusters;
for(i=0, j=0;i<K_Clusters;i++, j+=k)
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[i][z] = SourceMatrix.p_Column[(int)(SortedMatrix.p_Column[j][0])][z];
DeleteMatrix(SortedMatrix);
}
else
{
for(i=0;i<SourceMatrix.Col;i++)
{
TempMatrix.p_Column[i][0] = 0.0;
for(j=0;j<SourceMatrix.Row;j++)
{
TempMatrix.p_Column[i][0] += (SourceMatrix.p_Column[i][j] * SourceMatrix.p_Column[i][j]);
}
TempMatrix.p_Column[i][0] = sqrt(TempMatrix.p_Column[i][0] / (MATRIX_BASE_TYPE)SourceMatrix.Row);
}
SortMatrix(SortedMatrix, TempMatrix);
k = SortedMatrix.Col / K_Clusters;
for(i=0, j=0;i<K_Clusters;i++, j+=k)
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[i][z] = SourceMatrix.p_Column[(int)(SortedMatrix.p_Column[j][0])][z];
SortDistanceIndex = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0];
SortDistanceGap = fabs(TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0] - TempMatrix.p_Column[(int)(SortedMatrix.p_Column[SourceMatrix.Col - 1][0])][0]) / (MATRIX_BASE_TYPE)(K_Clusters * 2);
for(i=1, j=1;i<SortedMatrix.Col && j<K_Clusters;i++)
{
Temp = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][0];
if(fabs(Temp - SortDistanceIndex) >= SortDistanceGap)
{
SortDistanceIndex = Temp;
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[j][z] = SourceMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][z];
j++;
}
}
DeleteMatrix(SortedMatrix);
}
for(Process_Loop=0;(Process_Loop<TotalProcessLoop && Update_Continue);Process_Loop++)
{
// PreProcess Distance
for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
Distance_Compare_Method);
Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
/* for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
{
for(z=0;z<SourceMatrix.Row;z++)
{
Temp = SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z];
Cluster_Distance[j] += (Temp * Temp);
}
// Cluster_Distance[j] /= SourceMatrix.Row;
}
Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
*/
// Update
for(j=0;j<K_Clusters;j++)
{
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] = 0.0;
Cluster_Temp_Counter = 0;
for(i=0;i<SourceMatrix.Col;i++)
if(Cluster_Label[i] == j)
{
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] += (SourceMatrix.p_Column[i][k] * SourceMatrix.p_Column[i][k]);
Cluster_Temp_Counter++;
}
if(Cluster_Temp_Counter > 0)
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] = sqrt(Cluster_Center.p_Column[j][k] / (MATRIX_BASE_TYPE)Cluster_Temp_Counter);
}
// Process Check
Temp = 0.0;
for(j=0;j<K_Clusters;j++)
for(k=0;k<SourceMatrix.Row;k++)
Temp += fabs(Cluster_Center.p_Column[j][k] - Cluster_Center_Check.p_Column[j][k]);
Temp /= (K_Clusters * SourceMatrix.Row);
if(Temp < 2.0)
{
Update_Continue = FALSE;
Process_Final_Loop = Process_Loop + 1;
}
else
{
CopyMatrix(Cluster_Center_Check, Cluster_Center);
}
}
CopyMatrix(Result_Cluster_Center, Cluster_Center);
for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
Distance_Compare_Method);
Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
/* for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<K_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<K_Clusters;j++)
{
for(z=0;z<SourceMatrix.Row;z++)
{
Temp = SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z];
Cluster_Distance[j] += (Temp * Temp);
}
// Cluster_Distance[j] /= SourceMatrix.Row;
}
Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, K_Clusters);
}
*/
DeleteMatrix(TempMatrix);
DeleteMatrix(Cluster_Center);
DeleteMatrix(Cluster_Center_Check);
delete [] Cluster_Label;
Cluster_Label = NULL;
delete [] Cluster_Distance;
Cluster_Distance = NULL;
return(Process_Final_Loop);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -