📄 pagerankc.cpp
字号:
#include <windows.h>
#include <winbase.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <hash_map>
#include <hash_set>
#include <assert.h>
#include "../FileSort/Sort.h" //一个实现快速排序的文件,后面需要排序的函数都在这个文件中
#define BLOCKSIZE (64*1024*1024)
//#define GROUP 8
#define LENGTHOFSINGLE 8
#define OUTDEGREESIZE 2048
#pragma pack(1)
using namespace std;
//实现二分查找功能
template <typename type_key>
size_t binaryFind(type_key *begin,type_key *end,const type_key &value)
{
if(begin>=end) return end-begin;
//unsigned __int64 size = sizeof(type_key);
//long len = (end-begin);
int total = (end-begin);
int low=0;
int high=total-1;
int mid=0;
unsigned __int64 temp = 0;
while(low<=high)
{
mid = (low+high)/2;
temp = *(begin+mid);
if(temp==value)
{
return mid;
}
if(temp>value)
{
high=mid-1;
continue;
}
else
{
low=mid+1;
continue;
}
}
return total;
}
//完成将DocID转换成Index的过程,以后每次迭代不用再进行查找,算是中间的辅助函数
unsigned int BuildIndex(FILE * mapFile)
{
cout<<"start to build index"<<endl;
FILE *countFile = fopen("count.dat","rb");
if (countFile==NULL)
{
cout<<"count.dat can not open,please check the file"<<endl;
return 0;
}
unsigned int total;
unsigned int MaxOutDegree;
fread(&total,4,1,countFile);
fread(&MaxOutDegree,4,1,countFile);
fclose(countFile);
cout<<"total is :\t"<<total<<"\tand the MaxOutDegree:"<<MaxOutDegree<<endl;
if (total>0)
{
unsigned __int64 *index = new unsigned __int64[total];
char *sons = new char[MaxOutDegree*(LENGTHOFSINGLE+1)];
FILE *indexMap = fopen("indexMap.dat","wb");
FILE *DocIDIndex = fopen("DocID.index","wb");
unsigned __int64 DocID;
int outDegree;
for(size_t i=0;i<total;++i)
{
fread(index+i,LENGTHOFSINGLE,1,mapFile);
fread(&outDegree,4,1,mapFile);
fread(sons,LENGTHOFSINGLE+1,outDegree,mapFile);
}
cout<<"end of read file"<<endl;
fwrite(index,LENGTHOFSINGLE,total,DocIDIndex);
rewind(mapFile);
size_t position;
for(size_t i=0;i<total;++i)
{
fread(&DocID,LENGTHOFSINGLE,1,mapFile);
fread(&outDegree,4,1,mapFile);
fread(sons,LENGTHOFSINGLE+1,outDegree,mapFile);
fwrite(&i,4,1,indexMap);
fwrite(&outDegree,4,1,indexMap);
for (int j=0;j<outDegree;++j)
{
//fread(sons,LENGTHOFSINGLE+1,1,mapFile);
DocID = *(unsigned __int64 *)(sons+j*(LENGTHOFSINGLE+1));
position = binaryFind(index,index+total,DocID);
if (position != total)
{//找到
fwrite(&position,4,1,indexMap);
}
else fwrite(&total,4,1,indexMap);
}
}
fcloseall();
delete []index;
delete []sons;
}
cout<<"over build the index"<<endl;
return total;
}
int main(int argc, char *argv[])
{
ofstream fout("log.txt");
FILE *file = NULL;
if(argc>=2)
file = fopen(argv[1],"rb");
else
file = fopen("F:\\ren\\SortMap.dat","rb");
if(file==NULL)
cout<<"--------can not open the file--------"<<endl;
unsigned int start,end;
start = GetTickCount();
size_t total=BuildIndex(file);
end = GetTickCount();
cout<<"Build index lost:\t"<<end-start<<endl;
if (total<1)
{
cout<<"BuildIndex Error and the program exit"<<endl;
return 0;
}
size_t DocID=0;
int outDegree = 0;
fclose(file);
file = fopen("indexMap.dat","rb");
FILE *DocIDIndex = fopen("DocID.Index","rb");
cout<<"start the total is:"<<total<<endl;
if(total>0)
{ //真正实现的计算过程
unsigned int * sons = new unsigned int[1];
start = GetTickCount();
float *pageRank = new float[total];
float *prTmp = new float[total];
for (size_t i=0;i<total;++i)
{
pageRank[i] = 1.0f;
prTmp[i] = 0.0f;
}
end = GetTickCount();
cout<<"end of index:"<<end-start<<endl;
ofstream fout("pagerank.txt");
float fatherRank = 1;
const float alpha = 0.85f;
size_t position = total;
size_t index=0;
for (size_t iter=0;iter<30;++iter)
{
rewind(file);
start = GetTickCount();
for (size_t i=0;i<total;++i)
{
fread(&DocID,4,1,file);
fread(&outDegree,4,1,file);
if (outDegree==0)
{
continue;
}
fatherRank = pageRank[i]/outDegree;
for (int k=0;k<outDegree;++k)
{
fread(sons,4,1,file);
if (total > *sons && *sons>=0)
{
prTmp[ *sons ] += fatherRank;
}
}
}
for (int i=0;i<total;++i)
{
prTmp[i] = 0.15f + alpha*prTmp[i];
pageRank[i] = prTmp[i];
prTmp[i] = 0.0f;
}
end = GetTickCount();
cout<<">>>>>>>>>>>>>>>"<<(iter+1)<<"\t"<<end-start<<endl;
}
delete []prTmp;
delete []sons;
cout<<"over is writing"<<endl;
FILE * PageRankList = fopen("PageRankList.dat","wb");
fwrite(pageRank,4,total,PageRankList);
delete []pageRank;
}
fout.close();
fclose(file);
fclose(DocIDIndex);
fcloseall();
cout<<"-------over----------"<<endl;
return 1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -