⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.cpp

📁 实现了K均值算法
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// kmeans.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "math.h"
#include "kmeans.h"
#include <fstream>

using namespace std;
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
// The one and only application object

CWinApp theApp;


// 
const int usersum = 900+1;//用户总数
const int itemsum =1600+1;//推介项总数
CString basefile;// = "D:\\experiment\\data\\u3.base";
CString testfile;// = "D:\\experiment\\data\\u3.test";
CString staticPath = "D:\\experiment\\static.txt";
CString resultsPath = "D:\\experiment\\time.txt";
CString clusterPath_5 = "D:\\experiment\\cluster_5.base";
CString clusterPath_1 = "D:\\experiment\\cluster_1.base";
CString clusterPath_2 = "D:\\experiment\\cluster_2.base";
CString clusterPath_3 = "D:\\experiment\\cluster_3.base";
CString clusterPath_4 = "D:\\experiment\\cluster_4.base";


// FILE * fp;
struct _simi
{
	float value;//相似值 
	int num;	//相似用户号 
};
struct detailUserInfo 
{
	float PU[CLUSTER_NUM];
	int NUM[CLUSTER_NUM];
};
struct detailMovieInfo 
{
	float PI[MOVIE_CLUSTER_NUM];
	int NUM[MOVIE_CLUSTER_NUM];
};
// long trainuser[TRAIN_USER_NUM][ITEM_NUM]={0};	//训练集合user item rate矩阵
// long testUser[TEST_USER_NUM][ITEM_NUM]={0};
// long movie[ITEM_NUM][TEST_USER_NUM]={0};
// 
// 
// float cluster[CLUSTER_NUM][ITEM_NUM]={0};
// float movieCluster[MOVIE_CLUSTER_NUM][TEST_USER_NUM]={0};
long ** trainuser;
long ** movie;
float **cluster;
float ** movieCluster;

float RUI[CLUSTER_NUM][MOVIE_CLUSTER_NUM];

float evaluate[TEST_USER_NUM][ITEM_NUM];//存储useri对电影j的评分,r(i,j)
float PU[CLUSTER_NUM];// 类别i所占的比例
float PI[MOVIE_CLUSTER_NUM];//类别i所占的比例

int INUM[MOVIE_CLUSTER_NUM];//存储各类的个数一计算概率
int UNUM[CLUSTER_NUM];
detailMovieInfo Pmovie[ITEM_NUM];//属于各种类别的概率
detailUserInfo  Puser[TEST_USER_NUM];


int totalTrainNum=0;//测试集的总个数 
float recomend[ITEM_NUM];
float MAE = 0;
int recomItemCount = 0; 
CString clusterFilePath;
float tempCluster[ITEM_NUM][ITEM_NUM];//存储当前该聚类中的和
float tempData[MAX_CLUSTER_NUM]={0};
int tempNum[MAX_CLUSTER_NUM]={0};//存储当前聚类的个数
int tempNum1[MAX_CLUSTER_NUM][ITEM_NUM];
int Buf_UIR(char* buf,int *user, int *item, int *rate);

float Simility(long* Ua, float*Ub);
float SimilityPerson(long* Ua, float*Ub);
void loadMemory();//  申请动态内存
void freeMemory();

int maxValue(float data[],int len);

BOOL calculate(float** center,long **test,int flag,int op);
BOOL UpdateClusterCenter(int flag,int trainTimes);//根据用户、电影和训练次数训练聚类的中心

BOOL readFile(CString path);//从path的文件中读取数据到test,如果flag==1 ,是读取用户信息,否则读取电影信息

void initialCluster();
void initial();

void evaluateALL();

void getUserMovieRelation();
void evaluateALL()
{
	float temp;
	int i,j,k,l;
	for (i=1;i<TEST_USER_NUM;i++)
	{
		temp =0;
		for (j=1;j<ITEM_NUM;j++)
		{
			temp =0;
			for (k =0;k<CLUSTER_NUM;k++)
			{
				for (l=0;l<MOVIE_CLUSTER_NUM;l++)
				{
					temp+=Puser[i].PU[k]*Pmovie[j].PI[l]*RUI[k][l];//*PU[k]*PI[l];
				}
			}
			evaluate[i][j]=temp;
		//	cout<<temp<<"  ";
		}
	//	cout<<endl;
	}

	return;
}
void initial()
{
	int i,j;
	for (i=0;i<TEST_USER_NUM;i++)
	{
		for (j=0;j<CLUSTER_NUM;j++)
		{
			Puser[i].PU[j]=0;
			Puser[i].NUM[j]=0;
		}		
	}
	
	for (i=0;i<ITEM_NUM;i++)
	{
		for (j=0;j<MOVIE_CLUSTER_NUM;j++)
		{
			Pmovie[i].PI[j]=0;
			Pmovie[i].NUM[j]=0;		
		}			
	}
	for (i=0;i<CLUSTER_NUM;i++)
	{
		for (j=0;j<ITEM_NUM;j++)
		{
			cluster[i][j]=0;
		}
	}
	for (i=0;i<MOVIE_CLUSTER_NUM;i++)
	{
		for (j=0;j<TEST_USER_NUM;j++)
		{
			movie[i][j]=0;
		}
	}

	for (i=0;i<TEST_USER_NUM;i++)
	{
		for (j=0;j<ITEM_NUM;j++)
		{
			evaluate[i][j]=0;
		}
	}



}
void getUserMovieRelation()
{
	int i,j,k;
	float rate=0,sum=0;

	for (i=0;i<CLUSTER_NUM;i++)//以此对每个用户进行处理
	{
		rate =0;sum=0;
		for (j=0;j<MOVIE_CLUSTER_NUM;j++)
		{
			for (k=1;k<ITEM_NUM;k++)
			{
				rate+=cluster[i][k]*Pmovie[k].PI[j];
				if (fabs(cluster[i][j]-0)>0.1)
				{
					sum+=Pmovie[k].PI[j];
				}
				
				//测试出非零错误
// 				if (fabs(Pmovie[k].PI[j]-0)>0.0000001)
// 				{
// 					cout<<"rate"<<rate<<endl;
// 					cout<<sum<<endl;
// 				}
// 				else
// 				{
// 			//		cout<<"too"<<endl;
// 				}
			}
			
			if (sum)
			{
			//	cout<<"rate"<<rate<<" "<<sum<<endl;
			//	RUI[i][j]=(unsigned long)(rate*1000/sum)%5;	
				RUI[i][j]=rate/sum;
				cout<<RUI[i][j]<<endl;
			}
			else
			{
				RUI[i][j]=0;
			}
		}	
	

	}

	return;
}

int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
	ofstream fouts;
	ofstream foutT;
    fouts.open(staticPath,ios::out);
    foutT.open(resultsPath,ios::out);
	int nRetCode = 0;


	int cont=1;
	DWORD times =10;

	DWORD proStartTime=0;
	DWORD seconds=0;
	DWORD currentTime =0;
	DWORD startTime=0;
	int usernum;
	int itemnum;
	int rate;
	int i;

	
	int trainTimes;

	DWORD anzhsoft=GetCurrentTime()/1000;
	
	// initialize MFC and print and error on failure
	if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
	{
		// TODO: change error code to suit your needs
		cerr << _T("Fatal Error: MFC initialization failed") << endl;
		nRetCode = 1;
		return nRetCode;
	}	
	


	int testDataNum =0;
	while (testDataNum<5)
	{
		loadMemory();
		proStartTime = GetCurrentTime()/1000;
		testDataNum++;
		basefile.Format("D:\\experiment\\data\\u%d.base",testDataNum);
		testfile.Format("D:\\experiment\\data\\u%d.test",testDataNum);
	
		fouts<<"修正余弦相关相似性的算数平均"<<endl;
		fouts<<"第"<<testDataNum<<"个测试集"<<endl;
	
		startTime =GetCurrentTime()/1000;
		trainTimes=10;
			
// 		ZeroMemory(trainuser,sizeof(trainuser));
// 		ZeroMemory(movie,sizeof(movie));
// 		ZeroMemory(cluster,sizeof(cluster));
// 		ZeroMemory(movieCluster,sizeof(movieCluster));
		initial();

		// 首先对用户进行聚类
		readFile(basefile);
		

    	initialCluster();
		//测试中心节点是否赋值 
		/*
		float ff=0;
		for (j =1;j<CLUSTER_NUM;j++)
		{
			for (i=1;i<ITEM_NUM;i++)
			{
				ff+=cluster[j][i];
				if (cluster[j][i]>0)
				{
					
					cout<<cluster[j][i]<<endl;
				}
			//	if (trainuser[j][i]!=0)
				{
			//		cout<<"1"<<endl;
				}
				
			}
			
		}
		fouts<<ff<<endl;
		cout<<ff<<endl;
		break;*/
		

		
		currentTime = GetCurrentTime()/1000;
		trainTimes =50;

		foutT<<"训练user用时";
		currentTime = GetCurrentTime()/1000;
		UpdateClusterCenter(FLAG_USER,trainTimes);

		int i,j;
// 		for	(i=0;i<CLUSTER_NUM;i++)
// 		{
// 			for(j=0;j<ITEM_NUM;j++)
// 			{
// 				fouts<<cluster[i][j]<<"  ";
// 			}
// 			fouts<<endl;
// 		}

// 		fouts<<"user 每类所有的人数\n";
// 		for ( j =0;j<CLUSTER_NUM;j++)
// 		{
// 			fouts<<tempNum[j]<<endl;
// 		}
//		break;
		foutT<<GetCurrentTime()/1000-currentTime<<endl;
		foutT<<"训练movie用时";
		currentTime = GetCurrentTime()/1000;
		calculate((float**)cluster,(long**)trainuser,FLAG_USER,COS);
		

		UpdateClusterCenter(FLAG_MOVIE,trainTimes);
		foutT<<GetCurrentTime()/1000-currentTime<<endl;

// 		fouts<<"Moive"<<endl;
// 		for	(i=0;i<MOVIE_CLUSTER_NUM;i++)
// 		{
// 			for(j=0;j<TEST_USER_NUM;j++)
// 			{
// 				fouts<<movieCluster[i][j]<<"  ";
// 			}
// 			fouts<<endl;
// 		}

// 		fouts<<"movie 每类所有的总数\n";
// 		for ( j =0;j<MOVIE_CLUSTER_NUM;j++)
// 		{
// 			fouts<<tempNum[j]<<endl;
// 		}
// 		
		
		foutT<<"计算用户概率用时";
		currentTime=GetCurrentTime()/1000;
		
		
		foutT<<GetCurrentTime()/1000-currentTime<<endl;
		foutT<<"计算movie概率用时";
		currentTime = GetCurrentTime()/1000;
		
		calculate(movieCluster,movie,FLAG_MOVIE,COS);
/*		foutT<<GetCurrentTime()/1000-currentTime<<endl;
		fouts<<"******************************************user 各类的比率"<<endl;
		for (int i=0;i<CLUSTER_NUM;i++)
		{
			fouts<<PU[i]<<endl;
		}
		fouts<<"******************************************movie 各类的比率"<<endl;
		for (i=0;i<MOVIE_CLUSTER_NUM;i++)
		{
			fouts<<PI[i]<<endl;
		}
		fouts<<"******************************************用户与各类的比率"<<endl;
		for (i=0;i<CLUSTER_NUM;i++)
		{
			fouts<<Puser[190].PU[i]<<endl;
		}
		for (i=0;i<TEST_USER_NUM;i++)
		{
			for (j=0;j<CLUSTER_NUM;j++)
			{
				fouts<<Puser[i].PU[j]<<" ";
			}
			fouts<<endl;
			
		}
	  */

		getUserMovieRelation();
		fouts<<"RUI"<<endl;
		float temp;
// 		for (i=0;i<CLUSTER_NUM;i++)
// 		{
// 			temp =0;
// 			for (j=0;j<MOVIE_CLUSTER_NUM;j++)
// 			{
// 				fouts<<RUI[i][j]<<"    ";
// 				temp+=RUI[i][j];
// 			}
// 			fouts<<endl;
// 			fouts<<temp<<endl;
// 		}
		evaluateALL();
		FILE *fp=fopen(testfile,"r");
		if (!fp)
		{
			cout<<"open testfile failed";
			return nRetCode;
		}
		char tmpbuf[101];
		MAE =0;
		recomItemCount =0;
		while (!feof(fp))
		{
			fgets(tmpbuf,100,fp);
			Buf_UIR(tmpbuf,&usernum,&itemnum,&rate);
			if (usernum<usersum&&itemnum<itemsum)
			{				
				MAE += fabs(evaluate[usernum][itemnum]-rate);
				recomItemCount++;
			}
	
		}
		fclose(fp);
		MAE = MAE/recomItemCount;
		cout<<endl<<"MAE:"<<MAE<<"  推介项"<<recomItemCount<<endl;

		fouts<<endl<<"MAE:"<<MAE<<"  推介项"<<recomItemCount<<endl;
//		break;
		fouts<<"将数据聚类历时"<<currentTime-startTime<<"秒"<<endl;

		startTime = GetCurrentTime()/1000;

		fouts<<"获得各个类别的相似性历时"<<GetCurrentTime()/1000-startTime<<endl;
		
		startTime = GetCurrentTime()/1000;
		//用户相关度由高到低排序
	
		fouts<<"将用户的相似性排序总计耗时"<<GetCurrentTime()/1000-startTime<<endl;
		testfile.Format("D:\\experiment\\data\\u%d.test",testDataNum);

		startTime = GetCurrentTime()/1000;

		
		fouts<<"用户推荐总计耗时"<<GetCurrentTime()/1000-proStartTime<<endl;
	//	freeMemory();
	
	}

	

	


	
	
	


	fouts<<"程序运行总的时间"<<GetCurrentTime()/1000-anzhsoft<<endl;
	cout<<"程序运行总的时间"<<GetCurrentTime()/1000-anzhsoft<<endl;

	foutT.close();
	fouts.close();
//	cin>>i;
//	

	return nRetCode;
}
//进行一次的验证,以改更新中心节点

//计算每个用户属于每个类别的概率,每个组所占的比率
BOOL calculate(float** center,long **test,int flag,int op)
{

	int clusterNum,itemNum,totalNum,i,j;
	if (FLAG_USER==flag)
	{
		clusterNum = CLUSTER_NUM;
		itemNum = ITEM_NUM;
		totalNum = TRAIN_USER_NUM;
	}
	else if (FLAG_MOVIE == flag)
	{
		clusterNum = MOVIE_CLUSTER_NUM;
		itemNum = TRAIN_USER_NUM;
		totalNum = ITEM_NUM;
	}
	float*tempData= new float[clusterNum+1];//存储不同类别所占的概率
	float*totalData = new float[clusterNum+1];
	float temp;

	
	ZeroMemory(totalData,sizeof(totalData));

	// 首先获得每类所占的比率,顺序不能变。
	temp=0;
	for (i=0;i<clusterNum;i++)
	{
		temp+=tempNum[i];
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -