⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmean.cpp

📁 自行implement的k-mean(含fuzzy c mean)
💻 CPP
📖 第 1 页 / 共 2 页
字号:

int CKMean::C_Mean(DATA_MATRIX SourceMatrix, int C_Clusters, int *Result_Cluster_Label, DATA_MATRIX &Result_Cluster_Center, int TotalProcessLoop, UINT Distance_Compare_Method)
{
	int i, j, k, z, Process_Loop,
		Process_Final_Loop = TotalProcessLoop;
	MATRIX_BASE_TYPE	Temp, Cluster_Distance_Temp, Cluster_Temp_Counter,
						SortDistanceIndex, SortDistanceGap,
						Jnew, Jold = 0.0,
						Add_Value = 0.00001;//MachineEpsilon();
	BOOL				Update_Continue = TRUE;
	MATRIX_BASE_TYPE	*Cluster_Distance = new MATRIX_BASE_TYPE[C_Clusters];


	DATA_MATRIX		TempMatrix, SortedMatrix, Cluster_Center, Cluster_Scale, Cluster_Center_Check;
	TempMatrix.Data = SortedMatrix.Data = Cluster_Center.Data = Cluster_Scale.Data = Cluster_Center_Check.Data = NULL;
	InitialMatrix(TempMatrix, SourceMatrix.Col, 1, FALSE);
	InitialMatrix(Cluster_Center, C_Clusters, SourceMatrix.Row, TRUE);
	InitialMatrix(Cluster_Scale, SourceMatrix.Col, C_Clusters, FALSE);
	InitialMatrix(Cluster_Center_Check, C_Clusters, SourceMatrix.Row, TRUE);

	if(SourceMatrix.Row <= 1)
	{
		CopyMatrix(TempMatrix, SourceMatrix);
		SortMatrix(SortedMatrix, TempMatrix);
		k = SortedMatrix.Col / C_Clusters;
		DeleteMatrix(SortedMatrix);
	}
	else
	{
		for(i=0;i<SourceMatrix.Col;i++)
		{
			TempMatrix.p_Column[i][0] = 0.0;
			for(j=0;j<SourceMatrix.Row;j++)
				TempMatrix.p_Column[i][0] += (SourceMatrix.p_Column[i][j] * SourceMatrix.p_Column[i][j]);
			TempMatrix.p_Column[i][0] = sqrt(TempMatrix.p_Column[i][0] / SourceMatrix.Row);
		}

		SortMatrix(SortedMatrix, TempMatrix);
		k = SortedMatrix.Col / C_Clusters;

		SortDistanceIndex = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0];
		SortDistanceGap = fabs(TempMatrix.p_Column[(int)(SortedMatrix.p_Column[0][0])][0] - TempMatrix.p_Column[(int)(SortedMatrix.p_Column[SourceMatrix.Col - 1][0])][0]) / (MATRIX_BASE_TYPE)(C_Clusters * 2);
		for(i=1, j=1;i<SortedMatrix.Col && j<C_Clusters;i++)
		{
			Temp = TempMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][0];
			if(fabs(Temp - SortDistanceIndex) >= SortDistanceGap)
			{
				SortDistanceIndex = Temp;
				for(z=0;z<SourceMatrix.Row;z++)
					Cluster_Center.p_Column[j][z] = SourceMatrix.p_Column[(int)(SortedMatrix.p_Column[i][0])][z];
				j++;
			}
		}

		DeleteMatrix(SortedMatrix);
	}

	for(i=0, j=0;i<C_Clusters;i++, j+=k)
		for(z=0;z<SourceMatrix.Row;z++)
			Cluster_Center.p_Column[i][z] = SourceMatrix.p_Column[(int)SortedMatrix.p_Column[j][0]][z];

	for(Process_Loop=0;Process_Loop<TotalProcessLoop && Update_Continue;Process_Loop++)
	{
		// PreProcess Scale
		for(i=0;i<SourceMatrix.Col;i++)
		{
			for(j=0;j<C_Clusters;j++)
				Cluster_Scale.p_Column[i][j] = 0.0;
			Cluster_Distance_Temp = 0.0;

			for(j=0;j<C_Clusters;j++)
			{
				Cluster_Scale.p_Column[i][j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
					Distance_Compare_Method);

				if(Cluster_Scale.p_Column[i][j] < Add_Value)
					Cluster_Scale.p_Column[i][j] = Add_Value;

				Cluster_Scale.p_Column[i][j] = 1.0 / sqrt(Cluster_Scale.p_Column[i][j]);
				Cluster_Distance_Temp += Cluster_Scale.p_Column[i][j];
			}

			for(j=0;j<C_Clusters;j++)
				Cluster_Scale.p_Column[i][j] = Cluster_Scale.p_Column[i][j] / Cluster_Distance_Temp;
		}
/*
		for(i=0;i<SourceMatrix.Col;i++)
		{
			for(j=0;j<C_Clusters;j++)
				Cluster_Scale.p_Column[i][j] = 0.0;
			Cluster_Distance_Temp = 0.0;

			for(j=0;j<C_Clusters;j++)
			{
				for(z=0;z<SourceMatrix.Row;z++)
				{
					Temp = fabs(SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z]);

					if(Temp < Add_Value)
						Temp = Add_Value;

					Cluster_Scale.p_Column[i][j] += (Temp * Temp);
				}

				Cluster_Scale.p_Column[i][j] = 1.0 / sqrt(Cluster_Scale.p_Column[i][j] / SourceMatrix.Row);
				Cluster_Distance_Temp += Cluster_Scale.p_Column[i][j];
			}

			for(j=0;j<C_Clusters;j++)
				Cluster_Scale.p_Column[i][j] = Cluster_Scale.p_Column[i][j] / Cluster_Distance_Temp;
		}
*/
		// Update
		Jnew = 0.0;
		for(j=0;j<C_Clusters;j++) 
		{
			for(k=0;k<SourceMatrix.Row;k++)
				Cluster_Center.p_Column[j][k] = 0.0;

			Cluster_Temp_Counter = 0.0;
			for(i=0;i<SourceMatrix.Col;i++)
			{
				for(k=0;k<SourceMatrix.Row;k++)
					Cluster_Center.p_Column[j][k] += (SourceMatrix.p_Column[i][k] * Cluster_Scale.p_Column[i][j]);
				Cluster_Temp_Counter += Cluster_Scale.p_Column[i][j];
			}

			for(k=0;k<SourceMatrix.Row;k++)
				Cluster_Center.p_Column[j][k] = Cluster_Center.p_Column[j][k] / Cluster_Temp_Counter;

			for(i=0;i<SourceMatrix.Col;i++)
				for(k=0;k<SourceMatrix.Row;k++)
					Jnew += (Cluster_Scale.p_Column[i][j] * (Cluster_Center.p_Column[j][k] - SourceMatrix.p_Column[i][k]) * (Cluster_Center.p_Column[j][k] - SourceMatrix.p_Column[i][k]));
			Jnew /= (SourceMatrix.Row * SourceMatrix.Col * C_Clusters);
		}

		// Process Check
		Temp = 0.0;
		if((Jnew - Jold) < 5)
		{
			Update_Continue = FALSE;
			Process_Final_Loop = Process_Loop + 1;
		}
		else
		{
			Jold = Jnew;
		}
	}

	CopyMatrix(Result_Cluster_Center, Cluster_Center);

	for(i=0;i<SourceMatrix.Col;i++)
	{
		for(j=0;j<C_Clusters;j++)
			Cluster_Distance[j] = 0.0;
			
		for(j=0;j<C_Clusters;j++)
			Cluster_Distance[j] = CompareDistance(SourceMatrix.p_Column[i], Cluster_Center.p_Column[j], SourceMatrix.Row,
			Distance_Compare_Method);
		Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, C_Clusters);
	}

/*	for(i=0;i<SourceMatrix.Col;i++)
	{
		for(j=0;j<C_Clusters;j++)
			Cluster_Distance[j] = 0.0;

		for(j=0;j<C_Clusters;j++)
		{
			for(z=0;z<SourceMatrix.Row;z++)
			{
				Temp = SourceMatrix.p_Column[i][z] - Cluster_Center.p_Column[j][z];
				Cluster_Distance[j] += (Temp * Temp);
			}
//			Cluster_Distance[j] /= SourceMatrix.Row;
		}
		Result_Cluster_Label[i] = VectorFindMin(Cluster_Distance, C_Clusters);
	}
*/
	DeleteMatrix(TempMatrix);
	DeleteMatrix(Cluster_Center);
	DeleteMatrix(Cluster_Scale);
	DeleteMatrix(Cluster_Center_Check);

	delete [] Cluster_Distance;
	Cluster_Distance = NULL;

	return(Process_Final_Loop);
}

BOOL CKMean::InitialMatrix(DATA_MATRIX &SourceMatrix, int Column, int Row, BOOL ZeroMatrix)
{
	if(SourceMatrix.Data)
		DeleteMatrix(SourceMatrix);
	SourceMatrix.Col = Column;
	SourceMatrix.Row = Row;
	SourceMatrix.Data = new MATRIX_BASE_TYPE[Row * Column];
	if(!SourceMatrix.Data)
		return(FALSE);

	SourceMatrix.p_Column = new MATRIX_BASE_TYPE*[Column];

	for(int i=0;i<Column;i++)
		SourceMatrix.p_Column[i] = &SourceMatrix.Data[i * Row];

	if(ZeroMatrix)
		memset(SourceMatrix.Data, 0, sizeof(MATRIX_BASE_TYPE) * Row * Column);

	return(TRUE);
}

void CKMean::DeleteMatrix(DATA_MATRIX &SourceMatrix)
{
	SourceMatrix.Row = 0;
	SourceMatrix.Col = 0;

	if(SourceMatrix.Data) 
	{
		delete [] SourceMatrix.p_Column;
		SourceMatrix.p_Column = NULL;
		delete [] SourceMatrix.Data;
		SourceMatrix.Data = NULL;
	}
}

void CKMean::CopyMatrix(DATA_MATRIX &DestMatrix, DATA_MATRIX FromMatrix)
{
	InitialMatrix(DestMatrix, FromMatrix.Col, FromMatrix.Row);
	memcpy(DestMatrix.Data, FromMatrix.Data, sizeof(MATRIX_BASE_TYPE) * FromMatrix.Row * FromMatrix.Col);
}

void CKMean::SortMatrix(DATA_MATRIX &ResultMatrix, DATA_MATRIX SourceMatrix)
{
	SortMatrix(ResultMatrix, SourceMatrix, SourceMatrix.Col);
}

void CKMean::SortMatrix(DATA_MATRIX &ResultMatrix, DATA_MATRIX SourceMatrix, int Col_Range)
{
	int i, j, k,
		SortIndex;
	MATRIX_BASE_TYPE SortScore;

	DATA_MATRIX TempMatrix;
	TempMatrix.Data = NULL;
	CopyMatrix(TempMatrix, SourceMatrix);

	InitialMatrix(ResultMatrix, SourceMatrix.Col, SourceMatrix.Row, TRUE);

	for(i=0;i<SourceMatrix.Col;i++)
		for(j=0;j<SourceMatrix.Row;j++)
			ResultMatrix.p_Column[i][j] = (MATRIX_BASE_TYPE)i;

	for(j=0;j<SourceMatrix.Row;j++)
		for(i=0;i<Col_Range;i++)
		{
			SortIndex = i;
			SortScore = TempMatrix.p_Column[SortIndex][j];
			for(k=i+1;k<Col_Range;k++)
				if(SortScore > TempMatrix.p_Column[k][j])
				{
					SortScore = TempMatrix.p_Column[k][j];
					SortIndex = k;
				}
			SWAP(TempMatrix.p_Column[i][j], TempMatrix.p_Column[SortIndex][j]);
			SWAP(ResultMatrix.p_Column[i][j], ResultMatrix.p_Column[SortIndex][j]);
		}

	DeleteMatrix(TempMatrix);	
}

int CKMean::VectorFindMin(MATRIX_BASE_TYPE *Vector, int VectorSize)
{
	if(VectorSize <= 1)
		return(0);

	int	i, MinimumLabelID = 0;
	MATRIX_BASE_TYPE MinimumValue = Vector[0];

	for(i=1;i<VectorSize;i++)
		if(MinimumValue > Vector[i])
		{
			MinimumValue = Vector[i];
			MinimumLabelID = i;
		}

	return(MinimumLabelID);
}

MATRIX_BASE_TYPE CKMean::MachineEpsilon()
{
	MATRIX_BASE_TYPE	x = 1.0,
						y,
						z = (MATRIX_BASE_TYPE)(1.0 + x);
	while(z > 1.0)
	{
		y = x;
		x /= 2.0;
		// Avoid double precision!
		z = (MATRIX_BASE_TYPE)(1.0 + (MATRIX_BASE_TYPE)x);
    }

    return((MATRIX_BASE_TYPE)y);
}

void CKMean::DisplayMatrix(DATA_MATRIX MatrixData, CString TitleForDisplay)
{
	if(!MatrixData.Data)
		return;

	int i, j;
	CString Result, Temp;

	if(TitleForDisplay != "")
	{
		Result.Format("Matrix Dimension : %d x %d\n", MatrixData.Col, MatrixData.Row);
		Result = TitleForDisplay + "\n" + Result;
	}
	else
	{
		Result.Format("Matrix Dimension : %d x %d\n", MatrixData.Col, MatrixData.Row);
	}

	for(i=0;i<MatrixData.Col;i++)
	{
		for(j=0;j<MatrixData.Row;j++)
		{
			Temp.Format("%6.4f\t", MatrixData.p_Column[i][j]);
			Result += Temp;
		}

		Temp.Format("\n");
		Result += Temp;
	}
	::AfxMessageBox(Result);
}

MATRIX_BASE_TYPE CKMean::CompareDistance(MATRIX_BASE_TYPE *Compare_1, MATRIX_BASE_TYPE *Compare_2, int Compare_Count, UINT Compare_Method)
{
	// Distance Compare Method
	//	0(Default) : Euclidean Distance
	//	1 :	Cosine-Based Similarity

	int i;
	MATRIX_BASE_TYPE	Temp,
						Distance_Score = 0.0;

	switch(Compare_Method)
	{
	case 1:
		MATRIX_BASE_TYPE CBS_Temp_XiXj, CBS_Temp_Xia, CBS_Temp_Xja;

		CBS_Temp_XiXj = 0.0;
		CBS_Temp_Xia = 0.0;
		CBS_Temp_Xja = 0.0;

		for(i=0;i<Compare_Count;i++)
		{
			CBS_Temp_XiXj += (Compare_1[i] * Compare_2[i]);
			CBS_Temp_Xia += (Compare_1[i] * Compare_1[i]);
			CBS_Temp_Xja += (Compare_2[i] * Compare_2[i]);
		}

		CBS_Temp_Xia = sqrt(CBS_Temp_Xia);
		CBS_Temp_Xja = sqrt(CBS_Temp_Xja);

		Distance_Score = 1.0 - (
			((fabs(CBS_Temp_XiXj) / (CBS_Temp_Xia * CBS_Temp_Xja))) *
			(1 - (fabs(CBS_Temp_Xia - CBS_Temp_Xja) / Max(CBS_Temp_Xia, CBS_Temp_Xja)))
			);
		break;

	case 0:
	default:
		for(i=0;i<Compare_Count;i++)
		{
			Temp = Compare_1[i] - Compare_2[i];
			Distance_Score += (MATRIX_BASE_TYPE)(Temp * Temp);
		}
		Distance_Score /= (MATRIX_BASE_TYPE)Compare_Count;
		break;
	}

	return(Distance_Score);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -