📄 kmean.cpp
字号:
int CKMean::C_Mean(DATA_MATRIX SourceMatrix, int C_Clusters, int *Result_Cluster_Label, DATA_MATRIX &Result_Cluster_Center, int TotalProcessLoop, UINT Distance_Compare_Method)
{
int i, j, k, z, Process_Loop,
Process_Final_Loop = TotalProcessLoop;
MATRIX_BASE_TYPE Temp, Cluster_Distance_Temp, Cluster_Temp_Counter,
SortDistanceIndex, SortDistanceGap,
Jnew, Jold = 0.0,
Add_Value = 0.00001;//MachineEpsilon();
BOOL Update_Continue = TRUE;
MATRIX_BASE_TYPE *Cluster_Distance = new MATRIX_BASE_TYPE[C_Clusters];
DATA_MATRIX TempMatrix, SortedMatrix, Cluster_Center, Cluster_Scale, Cluster_Center_Check;
TempMatrix.Data = SortedMatrix.Data = Cluster_Center.Data = Cluster_Scale.Data = Cluster_Center_Check.Data = NULL;
InitialMatrix(TempMatrix, SourceMatrix.Col, 1, FALSE);
InitialMatrix(Cluster_Center, C_Clusters, SourceMatrix.Row, TRUE);
InitialMatrix(Cluster_Scale, SourceMatrix.Col, C_Clusters, FALSE);
InitialMatrix(Cluster_Center_Check, C_Clusters, SourceMatrix.Row, TRUE);
if(SourceMatrix.Row <= 1)
{
CopyMatrix(TempMatrix, SourceMatrix);
SortMatrix(SortedMatrix, TempMatrix);
k = SortedMatrix.Col / C_Clusters;
DeleteMatrix(SortedMatrix);
}
else
{
for(i=0;i<SourceMatrix.Col;i++)
{
TempMatrix.p_Column[i][0] = 0.0;
for(j=0;j<SourceMatrix.Row;j++)
TempMatrix.p_Column[i][0] += (SourceMatrix.p_Column[i][j] * SourceMatrix.p_Column[i][j]);
TempMatrix.p_Column[i][0] = sqrt(TempMatrix.p_Column[i][0] / SourceMatrix.Row);
}
SortMatrix(SortedMatrix, TempMatrix);
k = SortedMatrix.Col / C_Clusters;
SortDistanceIndex = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0];
SortDistanceGap = fabs(TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0] - TempMatrix.p_Column[(int)(SortedMatrix.p_Column[SourceMatrix.Col - 1][0])][0]) / (MATRIX_BASE_TYPE)(C_Clusters * 2);
for(i=1, j=1;i<SortedMatrix.Col && j<C_Clusters;i++)
{
Temp = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][0];
if(fabs(Temp - SortDistanceIndex) >= SortDistanceGap)
{
SortDistanceIndex = Temp;
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[j][z] = SourceMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][z];
j++;
}
}
DeleteMatrix(SortedMatrix);
}
for(i=0, j=0;i<C_Clusters;i++, j+=k)
for(z=0;z<SourceMatrix.Row;z++)
Cluster_Center.p_Column[i][z] = SourceMatrix.p_Column[(int)SortedMatrix.p_Column[j][0]][z];
for(Process_Loop=0;Process_Loop<TotalProcessLoop && Update_Continue;Process_Loop++)
{
// PreProcess Scale
for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<C_Clusters;j++)
Cluster_Scale.p_Column[i][j] = 0.0;
Cluster_Distance_Temp = 0.0;
for(j=0;j<C_Clusters;j++)
{
Cluster_Scale.p_Column[i][j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
Distance_Compare_Method);
if(Cluster_Scale.p_Column[i][j] < Add_Value)
Cluster_Scale.p_Column[i][j] = Add_Value;
Cluster_Scale.p_Column[i][j] = 1.0 / sqrt(Cluster_Scale.p_Column[i][j]);
Cluster_Distance_Temp += Cluster_Scale.p_Column[i][j];
}
for(j=0;j<C_Clusters;j++)
Cluster_Scale.p_Column[i][j] = Cluster_Scale.p_Column[i][j] / Cluster_Distance_Temp;
}
/*
for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<C_Clusters;j++)
Cluster_Scale.p_Column[i][j] = 0.0;
Cluster_Distance_Temp = 0.0;
for(j=0;j<C_Clusters;j++)
{
for(z=0;z<SourceMatrix.Row;z++)
{
Temp = fabs(SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z]);
if(Temp < Add_Value)
Temp = Add_Value;
Cluster_Scale.p_Column[i][j] += (Temp * Temp);
}
Cluster_Scale.p_Column[i][j] = 1.0 / sqrt(Cluster_Scale.p_Column[i][j] / SourceMatrix.Row);
Cluster_Distance_Temp += Cluster_Scale.p_Column[i][j];
}
for(j=0;j<C_Clusters;j++)
Cluster_Scale.p_Column[i][j] = Cluster_Scale.p_Column[i][j] / Cluster_Distance_Temp;
}
*/
// Update
Jnew = 0.0;
for(j=0;j<C_Clusters;j++)
{
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] = 0.0;
Cluster_Temp_Counter = 0.0;
for(i=0;i<SourceMatrix.Col;i++)
{
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] += (SourceMatrix.p_Column[i][k] * Cluster_Scale.p_Column[i][j]);
Cluster_Temp_Counter += Cluster_Scale.p_Column[i][j];
}
for(k=0;k<SourceMatrix.Row;k++)
Cluster_Center.p_Column[j][k] = Cluster_Center.p_Column[j][k] / Cluster_Temp_Counter;
for(i=0;i<SourceMatrix.Col;i++)
for(k=0;k<SourceMatrix.Row;k++)
Jnew += (Cluster_Scale.p_Column[i][j] * (Cluster_Center.p_Column[j][k] - SourceMatrix.p_Column[i][k]) * (Cluster_Center.p_Column[j][k] - SourceMatrix.p_Column[i][k]));
Jnew /= (SourceMatrix.Row * SourceMatrix.Col * C_Clusters);
}
// Process Check
Temp = 0.0;
if((Jnew - Jold) < 5)
{
Update_Continue = FALSE;
Process_Final_Loop = Process_Loop + 1;
}
else
{
Jold = Jnew;
}
}
CopyMatrix(Result_Cluster_Center, Cluster_Center);
for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<C_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<C_Clusters;j++)
Cluster_Distance[j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
Distance_Compare_Method);
Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, C_Clusters);
}
/* for(i=0;i<SourceMatrix.Col;i++)
{
for(j=0;j<C_Clusters;j++)
Cluster_Distance[j] = 0.0;
for(j=0;j<C_Clusters;j++)
{
for(z=0;z<SourceMatrix.Row;z++)
{
Temp = SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z];
Cluster_Distance[j] += (Temp * Temp);
}
// Cluster_Distance[j] /= SourceMatrix.Row;
}
Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, C_Clusters);
}
*/
DeleteMatrix(TempMatrix);
DeleteMatrix(Cluster_Center);
DeleteMatrix(Cluster_Scale);
DeleteMatrix(Cluster_Center_Check);
delete [] Cluster_Distance;
Cluster_Distance = NULL;
return(Process_Final_Loop);
}
BOOL CKMean::InitialMatrix(DATA_MATRIX &SourceMatrix, int Column, int Row, BOOL ZeroMatrix)
{
if(SourceMatrix.Data)
DeleteMatrix(SourceMatrix);
SourceMatrix.Col = Column;
SourceMatrix.Row = Row;
SourceMatrix.Data = new MATRIX_BASE_TYPE[Row * Column];
if(!SourceMatrix.Data)
return(FALSE);
SourceMatrix.p_Column = new MATRIX_BASE_TYPE*[Column];
for(int i=0;i<Column;i++)
SourceMatrix.p_Column[i] = &SourceMatrix.Data[i * Row];
if(ZeroMatrix)
memset(SourceMatrix.Data, 0, sizeof(MATRIX_BASE_TYPE) * Row * Column);
return(TRUE);
}
void CKMean::DeleteMatrix(DATA_MATRIX &SourceMatrix)
{
SourceMatrix.Row = 0;
SourceMatrix.Col = 0;
if(SourceMatrix.Data)
{
delete [] SourceMatrix.p_Column;
SourceMatrix.p_Column = NULL;
delete [] SourceMatrix.Data;
SourceMatrix.Data = NULL;
}
}
void CKMean::CopyMatrix(DATA_MATRIX &DestMatrix, DATA_MATRIX FromMatrix)
{
InitialMatrix(DestMatrix, FromMatrix.Col, FromMatrix.Row);
memcpy(DestMatrix.Data, FromMatrix.Data, sizeof(MATRIX_BASE_TYPE) * FromMatrix.Row * FromMatrix.Col);
}
void CKMean::SortMatrix(DATA_MATRIX &ResultMatrix, DATA_MATRIX SourceMatrix)
{
SortMatrix(ResultMatrix, SourceMatrix, SourceMatrix.Col);
}
void CKMean::SortMatrix(DATA_MATRIX &ResultMatrix, DATA_MATRIX SourceMatrix, int Col_Range)
{
int i, j, k,
SortIndex;
MATRIX_BASE_TYPE SortScore;
DATA_MATRIX TempMatrix;
TempMatrix.Data = NULL;
CopyMatrix(TempMatrix, SourceMatrix);
InitialMatrix(ResultMatrix, SourceMatrix.Col, SourceMatrix.Row, TRUE);
for(i=0;i<SourceMatrix.Col;i++)
for(j=0;j<SourceMatrix.Row;j++)
ResultMatrix.p_Column[i][j] = (MATRIX_BASE_TYPE)i;
for(j=0;j<SourceMatrix.Row;j++)
for(i=0;i<Col_Range;i++)
{
SortIndex = i;
SortScore = TempMatrix.p_Column[SortIndex][j];
for(k=i+1;k<Col_Range;k++)
if(SortScore > TempMatrix.p_Column[k][j])
{
SortScore = TempMatrix.p_Column[k][j];
SortIndex = k;
}
SWAP(TempMatrix.p_Column[i][j], TempMatrix.p_Column[SortIndex][j]);
SWAP(ResultMatrix.p_Column[i][j], ResultMatrix.p_Column[SortIndex][j]);
}
DeleteMatrix(TempMatrix);
}
int CKMean::VectorFindMin(MATRIX_BASE_TYPE *Vector, int VectorSize)
{
if(VectorSize <= 1)
return(0);
int i, MinimumLabelID = 0;
MATRIX_BASE_TYPE MinimumValue = Vector[0];
for(i=1;i<VectorSize;i++)
if(MinimumValue > Vector[i])
{
MinimumValue = Vector[i];
MinimumLabelID = i;
}
return(MinimumLabelID);
}
MATRIX_BASE_TYPE CKMean::MachineEpsilon()
{
MATRIX_BASE_TYPE x = 1.0,
y,
z = (MATRIX_BASE_TYPE)(1.0 + x);
while(z > 1.0)
{
y = x;
x /= 2.0;
// Avoid double precision!
z = (MATRIX_BASE_TYPE)(1.0 + (MATRIX_BASE_TYPE)x);
}
return((MATRIX_BASE_TYPE)y);
}
void CKMean::DisplayMatrix(DATA_MATRIX MatrixData, CString TitleForDisplay)
{
if(!MatrixData.Data)
return;
int i, j;
CString Result, Temp;
if(TitleForDisplay != "")
{
Result.Format("Matrix Dimension : %d x %d\n", MatrixData.Col, MatrixData.Row);
Result = TitleForDisplay + "\n" + Result;
}
else
{
Result.Format("Matrix Dimension : %d x %d\n", MatrixData.Col, MatrixData.Row);
}
for(i=0;i<MatrixData.Col;i++)
{
for(j=0;j<MatrixData.Row;j++)
{
Temp.Format("%6.4f\t", MatrixData.p_Column[i][j]);
Result += Temp;
}
Temp.Format("\n");
Result += Temp;
}
::AfxMessageBox(Result);
}
MATRIX_BASE_TYPE CKMean::CompareDistance(MATRIX_BASE_TYPE *Compare_1, MATRIX_BASE_TYPE *Compare_2, int Compare_Count, UINT Compare_Method)
{
// Distance Compare Method
// 0(Default) : Euclidean Distance
// 1 : Cosine-Based Similarity
int i;
MATRIX_BASE_TYPE Temp,
Distance_Score = 0.0;
switch(Compare_Method)
{
case 1:
MATRIX_BASE_TYPE CBS_Temp_XiXj, CBS_Temp_Xia, CBS_Temp_Xja;
CBS_Temp_XiXj = 0.0;
CBS_Temp_Xia = 0.0;
CBS_Temp_Xja = 0.0;
for(i=0;i<Compare_Count;i++)
{
CBS_Temp_XiXj += (Compare_1[i] * Compare_2[i]);
CBS_Temp_Xia += (Compare_1[i] * Compare_1[i]);
CBS_Temp_Xja += (Compare_2[i] * Compare_2[i]);
}
CBS_Temp_Xia = sqrt(CBS_Temp_Xia);
CBS_Temp_Xja = sqrt(CBS_Temp_Xja);
Distance_Score = 1.0 - (
((fabs(CBS_Temp_XiXj) / (CBS_Temp_Xia * CBS_Temp_Xja))) *
(1 - (fabs(CBS_Temp_Xia - CBS_Temp_Xja) / Max(CBS_Temp_Xia, CBS_Temp_Xja)))
);
break;
case 0:
default:
for(i=0;i<Compare_Count;i++)
{
Temp = Compare_1[i] - Compare_2[i];
Distance_Score += (MATRIX_BASE_TYPE)(Temp * Temp);
}
Distance_Score /= (MATRIX_BASE_TYPE)Compare_Count;
break;
}
return(Distance_Score);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -