⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.cpp

📁 实现了K均值算法
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	for (i=0;i<clusterNum;i++)
	{
		if (flag==FLAG_USER)
		{
			if (fabs(temp-0)>0.0000001)
			{
				PU[i]=(float)((float)tempNum[i])/temp;
			}
			
		}
		else if (flag==FLAG_MOVIE)
		{
			if (fabs(temp-0)>0.0000001)
			{
				PI[i]=(float)((float)tempNum[i])/temp;
			}
			
		}
	}


	for (i=1;i<totalNum;i++)
	{
		ZeroMemory(tempData,sizeof(tempData));
		temp =0;
		for (j=1;j<clusterNum+1;j++)
		{
			if (COS == op)
			{
				tempData[j]=Simility(test[i],center[j-1]);
			}
			else if (POS == op)
			{
				tempData[j]=SimilityPerson(test[i],center[j-1]);
			}
			totalData[j]+=tempData[j];
			temp+=tempData[j];
		}
		for (j=1;j<clusterNum+1;j++)
		{
			if (FLAG_USER == flag)
			{
				if (fabs(temp-0)>0.00000001)
				{
					Puser[i].PU[j-1]=tempData[j]/temp;
				}
				
			}
			else if (FLAG_MOVIE == flag)
			{
				if (fabs(temp-0)>0.00000001)
				{
					Pmovie[i].PI[j-1]=tempData[j]/temp;
				}
				
			//	cout<<Pmovie[i].PI[j-1]<<endl;
			}
		}
	}
	temp=0;

	//计算各个类别所占的概率
	//按照相似度相加,然后求所占的比率得到的结果是不行的,因为是平均的。

/*	for (i=1;i<clusterNum+1;i++)
	{
		temp+=totalData[i];
	}
	if (FLAG_USER == flag)
	{
		for (j=1;j<clusterNum+1;j++)
		{
			PU[j-1]=totalData[j]/temp;
		}		
	}
	else if (FLAG_MOVIE == flag)
	{
		for (j=1;j<clusterNum+1;j++)
		{
			PI[j-1]=totalData[j]/temp;
		}		
	}*/


	delete []totalData;
	delete []tempData;


	
	return TRUE;
}
/*
*对每一行数据进行处理,为用户,评价项,评价值 赋值
*/

int Buf_UIR(char* buf,int *user, int *item, int *rate)
{
	char U[20]={0};
	char I[20]={0};
	char R[20]={0};
	char *Temp;
	int curpos;

		//查找用户号
		Temp = strchr(buf,'	');
		curpos = Temp-buf;
		strncpy(U,buf,curpos);
		*user = atoi(U);
		//查找item号
		Temp = strchr(buf+curpos+1,'	');
		//curpos = Temp - buf;
		strncpy(I,buf+curpos+1,Temp-buf-curpos-1);
		*item = atoi(I);
		//查找rate值
		curpos = Temp - buf;
		Temp = strchr(buf+curpos+1,'	');
		strncpy(R,buf+curpos+1,Temp-buf-curpos-1);
		*rate = atoi(R);

	return 1;
}
/*
*计算向量Ua和Ub的相似性,返回值为Ua和Ub的相似度
*/
float Simility(long* Ua, float*Ub)
{
	int comItem = 0;//Ua与Ub的共有项
	float simility = 0.0;
	int comm[itemsum]={0};
	int sumUa = 0;
	int sumUb = 0;
	float aveUa = 0.0;
	float aveUb = 0.0;
	int countUa = 0;
	int countUb = 0;
	//查找Ua与Ub中的共有项
	int i;
	for (i=1;i<itemsum;i++)
	{
		if (Ua[i]>0&&Ub[i]>0)
		{
			comm[comItem] = i;
			comItem++;
		}
		if (Ua[i]>0){
			sumUa += Ua[i];
			countUa++;
		}
		if (Ub[i]>0){
			sumUb += Ub[i];
			countUb++;
		}
	}
	if (countUa!=0)
	{
		aveUa = sumUa/countUa;
	}
	if (countUb!=0)
	{
		aveUb = sumUb/countUb;
	}
	
	

	//如果共同评价项小于15,则不予评价
// 	if (comItem<15)
// 	{
// 		return 0;
// 	}
	//相似性计算:应用相关相似性计算方法
	float SumRelativeA = 0.0, SumRelativeB = 0.0, SumRelativeAB = 0.0;
	
	for (i=0;i<comItem;i++)
	{
		SumRelativeA += (Ua[comm[i]] - aveUa)*(Ua[comm[i]] - aveUa);
		SumRelativeB += (Ub[comm[i]] - aveUb)*(Ub[comm[i]] - aveUb);
		SumRelativeAB += fabs((Ua[comm[i]] - aveUa)*(Ub[comm[i]] - aveUb));
	}
	if (SumRelativeA<=0||SumRelativeB<=0)
		return 0;

	simility = SumRelativeAB/(sqrt(SumRelativeA)*sqrt(SumRelativeB));
	return simility;
}

/*计算向量Ua和Ub的相似性,返回值为Ua和Ub的相似度,应用泊松相关系数计算方法
*/
float SimilityPerson(long* Ua, float*Ub)
{
	int comItem = 0;//Ua与Ub的共有项
	float simility = 0.0;
	int comm[itemsum]={0};
	int sumUa = 0;
	int sumUb = 0;
	int sumUaUb = 0;	//sum of (Ua*Ub)
	int sumUaUa = 0;	//sum of (Ua*Ua)
	int sumUbUb = 0;	//sum of (Ub*Ub)
	float aveUa = 0.0;
	float aveUb = 0.0;

	//查找Ua与Ub中的共有项
	int i;
	for (i=1;i<itemsum;i++)
	{
		if (Ua[i]>0&&Ub[i]>0)
		{
			comm[comItem] = i;
			sumUa += Ua[i];
			sumUb += Ub[i];
			sumUaUb += Ua[i]*Ub[i];
			sumUaUa += Ua[i]*Ua[i];
			sumUbUb += Ub[i]*Ub[i];
			comItem++;
		}

	}


	//相似性计算:应用泊松相关系数方法
	float numeratorPerson = 0.0, denominatorPersonUa = 0.0, denominatorPersonUb = 0.0;
	numeratorPerson = -(sumUa*sumUb)/comItem;
	numeratorPerson += sumUaUb;

	denominatorPersonUa = -sumUa*sumUa/comItem;
	denominatorPersonUa += sumUaUa;
	denominatorPersonUb = -sumUb*sumUb/comItem;
	denominatorPersonUb += sumUbUb;

	simility = fabs((numeratorPerson)/sqrt(denominatorPersonUa*denominatorPersonUb));

	return simility;
}



BOOL UpdateClusterCenter(int flag,int trainTimes)
{
	int i,j,k,n;	
	ZeroMemory(tempCluster,ITEM_NUM*ITEM_NUM);
	ZeroMemory(tempData,MAX_CLUSTER_NUM);
	ZeroMemory(tempNum,MAX_CLUSTER_NUM);
	int clusterNum,itemNum,totalNum;
	if (FLAG_USER==flag)
	{
		clusterNum = CLUSTER_NUM;
		itemNum = ITEM_NUM;
		totalNum = TRAIN_USER_NUM;
	}
	else if (FLAG_MOVIE == flag)
	{
		clusterNum = MOVIE_CLUSTER_NUM;
		itemNum = TRAIN_USER_NUM;
		totalNum = ITEM_NUM;
	}

	int cont=0;
	while (cont++<trainTimes)
	{
		ZeroMemory(tempCluster,ITEM_NUM*ITEM_NUM);
	    ZeroMemory(tempData,MAX_CLUSTER_NUM);
    	ZeroMemory(tempNum,MAX_CLUSTER_NUM);
		ZeroMemory(tempNum1,MAX_CLUSTER_NUM*ITEM_NUM);
		
		for (i=1;i<totalNum;i++)
		{
			for (j=0;j<clusterNum;j++)
			{
				if (FLAG_USER == flag)
				{
					tempData[j]=Simility(trainuser[i],cluster[j]);
				}
				else if (FLAG_MOVIE == flag)
				{
					tempData[j]=Simility(movie[i],movieCluster[j]);
				}
				
			}
			k=maxValue(tempData,clusterNum);//十分注意各个接口的参数变化
		
			for (n=0;n<itemNum;n++)
			{
				if (FLAG_USER == flag)
				{
					tempCluster[k][n]+=trainuser[i][n];
					if (trainuser[i][n]!=0)
					{
						tempNum1[k][n]++;
					}
				}
				else if (FLAG_MOVIE == flag)
				{
					tempCluster[k][n]+=movie[i][n];
					if (movie[i][n]!=0)
					{
						tempNum1[k][n]++;
					}
				}
				
			}		
			tempNum[k]++;
		}

		for (j=0;j<clusterNum;j++)
		{
			for (n=0;n<itemNum;n++)
			{
				if (tempNum1[j][n]!=0)
				{					
					tempCluster[j][n]/=tempNum1[j][n];
				}
				else
					tempCluster[j][n]=0;
				
				if (FLAG_USER == flag)
				{
					cluster[j][n]=tempCluster[j][n];
				}
				else if (FLAG_MOVIE == flag)
				{
					movieCluster[j][n]=tempCluster[j][n];
				}
				
			}	
		}
	}



	return FALSE;
	
}

BOOL readFile(CString path)
{
	
	char tmpbuf[100];		//暂存文件一行记录

	int usernum;
	int itemnum;
	int rate;
	int i,j;
//初始化 “用户/推介项”  矩阵
	FILE * fp;
	fp=fopen(path,"r");
	if (!fp)
	{
		cout<<"open basefile failed";
		return FALSE;
	}
//	cout<<"开始训练"<<endl;
	memset(trainuser,0,sizeof(trainuser));
	for (i=1;i<TRAIN_USER_NUM;i++)
	{
		for (j=0;j<ITEM_NUM;j++)
		{
			trainuser[i][j]=0;
		}

	}

	ZeroMemory(movie,sizeof(movie));
	for (i=1;i<ITEM_NUM;i++)
	{
		for (j=0;j<TRAIN_USER_NUM;j++)
		{
			movie[i][j]=0;
		}

	}

	while (!feof(fp))
	{
		fgets(tmpbuf,100,fp);
		Buf_UIR(tmpbuf,&usernum,&itemnum,&rate);
		if (usernum<TRAIN_USER_NUM&&itemnum<itemsum)
		{
	//		if (FLAG_USER == flag)
			{
				trainuser[usernum][itemnum] = rate;	
		//		cout<<trainuser[usernum][itemnum]<<endl;
			}
	//		else if (FLAG_MOVIE == flag)
			{
				movie[itemnum][usernum] = rate;
			}
				
		}
	}	
	fclose(fp);

		for (j =1;j<CLUSTER_NUM;j++)
		{
			for (i=1;i<ITEM_NUM-1;i++)
			{
				//ff+=cluster[j][i];
				if (trainuser[j][i]<0)
				{
				cout<<trainuser[j][i]<<endl;
				}
				
			}
			
		}
	return TRUE;
}



int maxValue(float data[],int length)
{
	int i=0,j;
	float max=-100;
	for (i=0;i<length;i++)
	{
		if (data[i]>max)
		{
			max=data[i];
			j=i;
		}
	
	}
	return j;
}

void initialCluster()
{
	int i,j;
	for (i=0;i<CLUSTER_NUM;i++)
	{
		for (j=1;j<ITEM_NUM;j++)
		{
		//	cluster[i][j]=trainuser[i+1][j];
			cluster[i][j]=trainuser[i+1][j];
		//	cout<<cluster[i][j]<<endl;
			if (cluster[i][j]<0)
			{
				cout<<"aa"<<endl;
			}
		
		}
	}

	for (i=0;i<MOVIE_CLUSTER_NUM;i++)
	{
		for (j=1;j<TEST_USER_NUM;j++)
		{
		//	cluster[i][j]=trainuser[i+1][j];
			movieCluster[i][j]=movie[i+1][j];
		}
	}

}

void loadMemory()
{
	int i;
	trainuser = (long **)malloc(sizeof(long *)*TEST_USER_NUM);
	for (i=0;i<TEST_USER_NUM;i++)
	{
		trainuser[i]=(long *)malloc(sizeof(long)*ITEM_NUM);
	}
	movie = (long **)malloc(sizeof(long *)*ITEM_NUM);
	for (i=0;i<ITEM_NUM;i++)
	{
		movie[i]=(long*)malloc(sizeof(long)*TEST_USER_NUM);
	}
	cluster = (float**)malloc(sizeof(float*)*CLUSTER_NUM);
	for (i=0;i<CLUSTER_NUM;i++)
	{
		cluster[i]=(float*)malloc(sizeof(float)*ITEM_NUM);
	}
	movieCluster = (float**)malloc(sizeof(float*)*MOVIE_CLUSTER_NUM);
	for (i=0;i<MOVIE_CLUSTER_NUM;i++)
	{
		movieCluster[i]=(float*)malloc(sizeof(float)*TEST_USER_NUM);
	}

}
void freeMemory()
{
	/*
	malloc分配的内存空间,如果越界,free时就会产生这样的错误。DAMAGE:after Normal block(#41) at 0x009207A8 */
	int i;
	i=0;
	free(trainuser);
	free(movie);
	free(cluster);
	free(movieCluster);
	/*for (i=0;i<TEST_USER_NUM;i++)
	{
		free(trainuser[i]);
	}
	free(trainuser);

	for (i=0;i<ITEM_NUM;i++)
	{
		free(movie[i]);
	}
	free(movie);

	for (i=0;i<CLUSTER_NUM;i++)
	{
		free(cluster[i]);
	}
	free(cluster);

	for (i=0;i<MOVIE_CLUSTER_NUM;i++)
	{
		free(movieCluster[i]);
	}
	free(movieCluster);*/
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -