📄 kmeans.cpp
字号:
for (i=0;i<clusterNum;i++)
{
if (flag==FLAG_USER)
{
if (fabs(temp-0)>0.0000001)
{
PU[i]=(float)((float)tempNum[i])/temp;
}
}
else if (flag==FLAG_MOVIE)
{
if (fabs(temp-0)>0.0000001)
{
PI[i]=(float)((float)tempNum[i])/temp;
}
}
}
for (i=1;i<totalNum;i++)
{
ZeroMemory(tempData,sizeof(tempData));
temp =0;
for (j=1;j<clusterNum+1;j++)
{
if (COS == op)
{
tempData[j]=Simility(test[i],center[j-1]);
}
else if (POS == op)
{
tempData[j]=SimilityPerson(test[i],center[j-1]);
}
totalData[j]+=tempData[j];
temp+=tempData[j];
}
for (j=1;j<clusterNum+1;j++)
{
if (FLAG_USER == flag)
{
if (fabs(temp-0)>0.00000001)
{
Puser[i].PU[j-1]=tempData[j]/temp;
}
}
else if (FLAG_MOVIE == flag)
{
if (fabs(temp-0)>0.00000001)
{
Pmovie[i].PI[j-1]=tempData[j]/temp;
}
// cout<<Pmovie[i].PI[j-1]<<endl;
}
}
}
temp=0;
//计算各个类别所占的概率
//按照相似度相加,然后求所占的比率得到的结果是不行的,因为是平均的。
/* for (i=1;i<clusterNum+1;i++)
{
temp+=totalData[i];
}
if (FLAG_USER == flag)
{
for (j=1;j<clusterNum+1;j++)
{
PU[j-1]=totalData[j]/temp;
}
}
else if (FLAG_MOVIE == flag)
{
for (j=1;j<clusterNum+1;j++)
{
PI[j-1]=totalData[j]/temp;
}
}*/
delete []totalData;
delete []tempData;
return TRUE;
}
/*
*对每一行数据进行处理,为用户,评价项,评价值 赋值
*/
int Buf_UIR(char* buf,int *user, int *item, int *rate)
{
char U[20]={0};
char I[20]={0};
char R[20]={0};
char *Temp;
int curpos;
//查找用户号
Temp = strchr(buf,' ');
curpos = Temp-buf;
strncpy(U,buf,curpos);
*user = atoi(U);
//查找item号
Temp = strchr(buf+curpos+1,' ');
//curpos = Temp - buf;
strncpy(I,buf+curpos+1,Temp-buf-curpos-1);
*item = atoi(I);
//查找rate值
curpos = Temp - buf;
Temp = strchr(buf+curpos+1,' ');
strncpy(R,buf+curpos+1,Temp-buf-curpos-1);
*rate = atoi(R);
return 1;
}
/*
*计算向量Ua和Ub的相似性,返回值为Ua和Ub的相似度
*/
float Simility(long* Ua, float*Ub)
{
int comItem = 0;//Ua与Ub的共有项
float simility = 0.0;
int comm[itemsum]={0};
int sumUa = 0;
int sumUb = 0;
float aveUa = 0.0;
float aveUb = 0.0;
int countUa = 0;
int countUb = 0;
//查找Ua与Ub中的共有项
int i;
for (i=1;i<itemsum;i++)
{
if (Ua[i]>0&&Ub[i]>0)
{
comm[comItem] = i;
comItem++;
}
if (Ua[i]>0){
sumUa += Ua[i];
countUa++;
}
if (Ub[i]>0){
sumUb += Ub[i];
countUb++;
}
}
if (countUa!=0)
{
aveUa = sumUa/countUa;
}
if (countUb!=0)
{
aveUb = sumUb/countUb;
}
//如果共同评价项小于15,则不予评价
// if (comItem<15)
// {
// return 0;
// }
//相似性计算:应用相关相似性计算方法
float SumRelativeA = 0.0, SumRelativeB = 0.0, SumRelativeAB = 0.0;
for (i=0;i<comItem;i++)
{
SumRelativeA += (Ua[comm[i]] - aveUa)*(Ua[comm[i]] - aveUa);
SumRelativeB += (Ub[comm[i]] - aveUb)*(Ub[comm[i]] - aveUb);
SumRelativeAB += fabs((Ua[comm[i]] - aveUa)*(Ub[comm[i]] - aveUb));
}
if (SumRelativeA<=0||SumRelativeB<=0)
return 0;
simility = SumRelativeAB/(sqrt(SumRelativeA)*sqrt(SumRelativeB));
return simility;
}
/*计算向量Ua和Ub的相似性,返回值为Ua和Ub的相似度,应用泊松相关系数计算方法
*/
float SimilityPerson(long* Ua, float*Ub)
{
int comItem = 0;//Ua与Ub的共有项
float simility = 0.0;
int comm[itemsum]={0};
int sumUa = 0;
int sumUb = 0;
int sumUaUb = 0; //sum of (Ua*Ub)
int sumUaUa = 0; //sum of (Ua*Ua)
int sumUbUb = 0; //sum of (Ub*Ub)
float aveUa = 0.0;
float aveUb = 0.0;
//查找Ua与Ub中的共有项
int i;
for (i=1;i<itemsum;i++)
{
if (Ua[i]>0&&Ub[i]>0)
{
comm[comItem] = i;
sumUa += Ua[i];
sumUb += Ub[i];
sumUaUb += Ua[i]*Ub[i];
sumUaUa += Ua[i]*Ua[i];
sumUbUb += Ub[i]*Ub[i];
comItem++;
}
}
//相似性计算:应用泊松相关系数方法
float numeratorPerson = 0.0, denominatorPersonUa = 0.0, denominatorPersonUb = 0.0;
numeratorPerson = -(sumUa*sumUb)/comItem;
numeratorPerson += sumUaUb;
denominatorPersonUa = -sumUa*sumUa/comItem;
denominatorPersonUa += sumUaUa;
denominatorPersonUb = -sumUb*sumUb/comItem;
denominatorPersonUb += sumUbUb;
simility = fabs((numeratorPerson)/sqrt(denominatorPersonUa*denominatorPersonUb));
return simility;
}
BOOL UpdateClusterCenter(int flag,int trainTimes)
{
int i,j,k,n;
ZeroMemory(tempCluster,ITEM_NUM*ITEM_NUM);
ZeroMemory(tempData,MAX_CLUSTER_NUM);
ZeroMemory(tempNum,MAX_CLUSTER_NUM);
int clusterNum,itemNum,totalNum;
if (FLAG_USER==flag)
{
clusterNum = CLUSTER_NUM;
itemNum = ITEM_NUM;
totalNum = TRAIN_USER_NUM;
}
else if (FLAG_MOVIE == flag)
{
clusterNum = MOVIE_CLUSTER_NUM;
itemNum = TRAIN_USER_NUM;
totalNum = ITEM_NUM;
}
int cont=0;
while (cont++<trainTimes)
{
ZeroMemory(tempCluster,ITEM_NUM*ITEM_NUM);
ZeroMemory(tempData,MAX_CLUSTER_NUM);
ZeroMemory(tempNum,MAX_CLUSTER_NUM);
ZeroMemory(tempNum1,MAX_CLUSTER_NUM*ITEM_NUM);
for (i=1;i<totalNum;i++)
{
for (j=0;j<clusterNum;j++)
{
if (FLAG_USER == flag)
{
tempData[j]=Simility(trainuser[i],cluster[j]);
}
else if (FLAG_MOVIE == flag)
{
tempData[j]=Simility(movie[i],movieCluster[j]);
}
}
k=maxValue(tempData,clusterNum);//十分注意各个接口的参数变化
for (n=0;n<itemNum;n++)
{
if (FLAG_USER == flag)
{
tempCluster[k][n]+=trainuser[i][n];
if (trainuser[i][n]!=0)
{
tempNum1[k][n]++;
}
}
else if (FLAG_MOVIE == flag)
{
tempCluster[k][n]+=movie[i][n];
if (movie[i][n]!=0)
{
tempNum1[k][n]++;
}
}
}
tempNum[k]++;
}
for (j=0;j<clusterNum;j++)
{
for (n=0;n<itemNum;n++)
{
if (tempNum1[j][n]!=0)
{
tempCluster[j][n]/=tempNum1[j][n];
}
else
tempCluster[j][n]=0;
if (FLAG_USER == flag)
{
cluster[j][n]=tempCluster[j][n];
}
else if (FLAG_MOVIE == flag)
{
movieCluster[j][n]=tempCluster[j][n];
}
}
}
}
return FALSE;
}
BOOL readFile(CString path)
{
char tmpbuf[100]; //暂存文件一行记录
int usernum;
int itemnum;
int rate;
int i,j;
//初始化 “用户/推介项” 矩阵
FILE * fp;
fp=fopen(path,"r");
if (!fp)
{
cout<<"open basefile failed";
return FALSE;
}
// cout<<"开始训练"<<endl;
memset(trainuser,0,sizeof(trainuser));
for (i=1;i<TRAIN_USER_NUM;i++)
{
for (j=0;j<ITEM_NUM;j++)
{
trainuser[i][j]=0;
}
}
ZeroMemory(movie,sizeof(movie));
for (i=1;i<ITEM_NUM;i++)
{
for (j=0;j<TRAIN_USER_NUM;j++)
{
movie[i][j]=0;
}
}
while (!feof(fp))
{
fgets(tmpbuf,100,fp);
Buf_UIR(tmpbuf,&usernum,&itemnum,&rate);
if (usernum<TRAIN_USER_NUM&&itemnum<itemsum)
{
// if (FLAG_USER == flag)
{
trainuser[usernum][itemnum] = rate;
// cout<<trainuser[usernum][itemnum]<<endl;
}
// else if (FLAG_MOVIE == flag)
{
movie[itemnum][usernum] = rate;
}
}
}
fclose(fp);
for (j =1;j<CLUSTER_NUM;j++)
{
for (i=1;i<ITEM_NUM-1;i++)
{
//ff+=cluster[j][i];
if (trainuser[j][i]<0)
{
cout<<trainuser[j][i]<<endl;
}
}
}
return TRUE;
}
int maxValue(float data[],int length)
{
int i=0,j;
float max=-100;
for (i=0;i<length;i++)
{
if (data[i]>max)
{
max=data[i];
j=i;
}
}
return j;
}
void initialCluster()
{
int i,j;
for (i=0;i<CLUSTER_NUM;i++)
{
for (j=1;j<ITEM_NUM;j++)
{
// cluster[i][j]=trainuser[i+1][j];
cluster[i][j]=trainuser[i+1][j];
// cout<<cluster[i][j]<<endl;
if (cluster[i][j]<0)
{
cout<<"aa"<<endl;
}
}
}
for (i=0;i<MOVIE_CLUSTER_NUM;i++)
{
for (j=1;j<TEST_USER_NUM;j++)
{
// cluster[i][j]=trainuser[i+1][j];
movieCluster[i][j]=movie[i+1][j];
}
}
}
void loadMemory()
{
int i;
trainuser = (long **)malloc(sizeof(long *)*TEST_USER_NUM);
for (i=0;i<TEST_USER_NUM;i++)
{
trainuser[i]=(long *)malloc(sizeof(long)*ITEM_NUM);
}
movie = (long **)malloc(sizeof(long *)*ITEM_NUM);
for (i=0;i<ITEM_NUM;i++)
{
movie[i]=(long*)malloc(sizeof(long)*TEST_USER_NUM);
}
cluster = (float**)malloc(sizeof(float*)*CLUSTER_NUM);
for (i=0;i<CLUSTER_NUM;i++)
{
cluster[i]=(float*)malloc(sizeof(float)*ITEM_NUM);
}
movieCluster = (float**)malloc(sizeof(float*)*MOVIE_CLUSTER_NUM);
for (i=0;i<MOVIE_CLUSTER_NUM;i++)
{
movieCluster[i]=(float*)malloc(sizeof(float)*TEST_USER_NUM);
}
}
void freeMemory()
{
/*
malloc分配的内存空间,如果越界,free时就会产生这样的错误。DAMAGE:after Normal block(#41) at 0x009207A8 */
int i;
i=0;
free(trainuser);
free(movie);
free(cluster);
free(movieCluster);
/*for (i=0;i<TEST_USER_NUM;i++)
{
free(trainuser[i]);
}
free(trainuser);
for (i=0;i<ITEM_NUM;i++)
{
free(movie[i]);
}
free(movie);
for (i=0;i<CLUSTER_NUM;i++)
{
free(cluster[i]);
}
free(cluster);
for (i=0;i<MOVIE_CLUSTER_NUM;i++)
{
free(movieCluster[i]);
}
free(movieCluster);*/
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -