📄 相似度计算_zhang.txt
字号:
#include <mpi.h>
#include <stdio.h>
#include <iostream>
#include <math.h>
#include <stdlib.h>
#include <vector>
#include <string>
#include <unistd.h>
#include "ZeroVector.h"
#define FILES_PER_DIR 1000
using namespace std;
int SaveVector(int index, char fileName[],float xaV[]);
int GetFileName(char base[],long index,string& fileName)
{
char errMsg[100];
char c[20];
int status;
int dirNum(0);
if(index<1)
{
sprintf(errMsg,"Index Parameter Illegal In GetFileName! File(%s), Line(%d).",__FILE__,__LINE__);
cout<<errMsg<<endl;
//ErrorFile(errMsg,FILE_TEXT_ERR);
return 1;
}
dirNum=index/FILES_PER_DIR;
if(index%FILES_PER_DIR==0)
{
dirNum--;
}
fileName.clear();
fileName.append(base);
fileName.append("/");
sprintf(c,"%04d",dirNum);
fileName.append(c);
fileName.append("/");
sprintf(c,"%07d",index);
fileName.append(c);
return 0;
}
//////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
float Cosine(int n,const float v1[],const float v2[])
{
float sum=0;
float A=0,B=0,C=0;
for(int i=1;i<=n;i++)
{
sum+=v1[i]*v2[i];
A+=v1[i]*v1[i];
B+=v2[i]*v2[i];
}
C=A*B;
return(sum/sqrt(C));
}
/////////////////////////////////////////////////////////
int CreatDirs(char base[],long index)
{
char errMsg[100];
string fileName;
char c[20];
int status;
int dirCount(0);
// int filesPerDir(1000);
if(index<1)
{
sprintf(errMsg,"Index Parameter Illegal In CreatDirs! File(%s), Line(%d).",__FILE__,__LINE__);
cout<<errMsg<<endl;
//ErrorFile(errMsg,FILE_TEXT_ERR);
return 1;
}
//If base dir not aviable, then create it
if(access(base,F_OK)!=0)
{
status=mkdir(base);
if(!status)
{
cout<<base<<" directory created."<<endl;
}
else
{
sprintf(errMsg,"Unable to create directory %s! File(%s), Line(%d).",base,__FILE__,__LINE__);
// cout<<"Unable to create directory "<<base<<"."<<endl;
//ErrorFile(errMsg,FILE_TEXT_ERR);
return 1;
}
}
dirCount=index/FILES_PER_DIR;
if(index%FILES_PER_DIR)
{
dirCount++;
}
for(int i=0;i<dirCount;i++)
{
fileName.clear();
fileName.append(base);
fileName.append("/");
sprintf(c,"%04d",i);
fileName.append(c);
if(access(fileName.c_str(),F_OK)!=0)
{
status=mkdir(fileName.c_str());
if(!status)
{
cout<<fileName<<" directory created."<<endl;
}
else
{
sprintf(errMsg,"Unable to create directory %s! File(%s), Line(%d).",fileName.c_str(),__FILE__,__LINE__);
// cout<<"Unable to create directory "<<fileName<<"."<<endl;
//ErrorFile(errMsg,FILE_TEXT_ERR);
return 1;
}
}
}
return 0;
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
int MySim_M(int m,int n, char vectorFolder[], char simFolder[])
{
/////////////////////////////////////////
int myid;
int numprocs;
MPI_Status status;
int revIndexs;
///////////////////////////////////////////////////
FILE *fp=NULL;
FILE *outfp=NULL;
int index(0);
float frequency(0);
int indcount(0);
char errMsg[100];
string filename;
string outfilename;
float *buffer;
float *recvbuf;
float *sumbuffer;
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
buffer=new float[m];
recvbuf=new float[m];
sumbuffer=new float[m];
if(CreatDirs(simFolder,n)!=0)
{
cout<<"creatdirs error~!"<<endl;
return 1;
}
for(int i=1;i<=n;i++)
{
//初始化
ZeroVector(m,buffer);
ZeroVector(m,recvbuf);
ZeroVector(m,sumbuffer);
GetFileName(vectorFolder,i,filename);
if((fp=fopen(filename.c_str(),"rb"))==NULL)
{
sprintf(errMsg,"Can not open the file! File(%s), Line(%d).",__FILE__,__LINE__);
cout<<errMsg<<endl;
//ErrorFile(errMsg,FILE_TEXT_ERR);
return 1;
}
fread(&indcount,sizeof(int),1,fp);
for(int j=1;j<=indcount;j++)
{
fread(&index,sizeof(int),1,fp);
fread(&frequency,sizeof(float),1,fp);
buffer[index-1]=frequency;
}
for(int j=1;j<numprocs;j++)
{
//Broadcast buffer
MPI_Send(buffer, m, MPI_FLOAT, j,0,MPI_COMM_WORLD);
}
// receive result;
for(int h=1;h<numprocs;h++)
{
MPI_Recv(recvbuf,m,MPI_FLOAT,h,0,MPI_COMM_WORLD,&status);
for(int j=0;j<m;j++)
{
sumbuffer[j]+=recvbuf[j];
}
}
/////////写文件
if(GetFileName(simFolder,i,outfilename)!=0)
{
cout<<"getfilename error~!"<<endl;
return 1;
}
SaveVector(m,(char*)outfilename.c_str(),sumbuffer);
fclose(fp);
}
delete[] buffer;
delete[] recvbuf;
delete[] sumbuffer;
return 0;
}
int MySim_S(int m,int n, char vectorFolder[])
{
///////////////////////////////////
int myid;
int numprocs;
MPI_Status status;
FILE *fp=NULL;
//////////////////////////////////////
float *buffer_s;
float *buffer;
float *buf;
float *resbuf;
int index_s(0);
int index(0);
float frequency_s(0);
int indcount_s(0);
char errMsg[100];
string filename_s;
vector<int> vIndexs;
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
for(int i=myid-1;i<n;i+=numprocs-1)
{
vIndexs.push_back(i+1);
}
buffer_s=new float[vIndexs.size()*m]; //存放vIndexs所有的列
ZeroVector(vIndexs.size()*m,buffer_s);
buffer=new float[m]; //接收从主线程传出的每一列
buf=new float[m]; //取每一列
resbuf=new float[m];
for(int j=0;j<vIndexs.size();j++)
{
index=vIndexs[j];
GetFileName(vectorFolder,index,filename_s);
if((fp=fopen(filename_s.c_str(),"rb"))==NULL)
{
sprintf(errMsg,"Can not open the file! File(%s), Line(%d).",__FILE__,__LINE__);
cout<<errMsg<<endl;
//ErrorFile(errMsg,FILE_TEXT_ERR);
return 1;
}
fread(&indcount_s,sizeof(int),1,fp);
//cout<<"ok"<<endl;
for(int i=1;i<=indcount_s;i++)
{
fread(&index_s,sizeof(int),1,fp);
fread(&frequency_s,sizeof(float),1,fp);
buffer_s[j*m+index_s-1]=frequency_s;
}
fclose(fp);
}
////////////从主线程中接收每一行,计算相似度并发给主线程
for(int i=0;i<n;i++)
{
ZeroVector(m,buffer);
ZeroVector(m,resbuf);
MPI_Recv(buffer, m, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &status);
for(int j=0;j<vIndexs.size();j++)
{
ZeroVector(m,buf);
for(int h=0;h<m;h++)
{
buf[h]=buffer_s[j*m+h];
}
resbuf[vIndexs[j]]=Cosine(m,buffer,buf);
}
MPI_Send(resbuf,m,MPI_FLOAT,0,0,MPI_COMM_WORLD);
}
delete[] buffer_s;
delete[] buffer;
delete[] buf;
delete[] resbuf;
return 0;
}
int SimCosine(int m,int n, char vectorFolder[], char simFolder[])
{
char processor_name[MPI_MAX_PROCESSOR_NAME];//机器名
int myid; //本机id号
int numprocs; //参与运算的机器的总数
int namelen; //机器名长度
MPI_Init(NULL,NULL);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Get_processor_name(processor_name,&namelen);
if(myid==0)
MySim_M(m,n, vectorFolder, simFolder);
else
MySim_S(m,n, vectorFolder);
MPI_Finalize();
return 0;
}
int main()
{
SimCosine(10000,10000,"f:/result_x","e:/outresult");
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -