📄 priorizer.cpp
字号:
#include "stdafx.h"
#include "Priorizer.h"
#include "BloomFilter.h"
#include <time.h>
#include <WinInet.h>
//////////////////////////////////////////////////////////////////////////
FDBHANDLE CPriorizer::s_hDbTask = INVALID_FDBHANDLE;
long CPriorizer::s_uOpCount = 0;
//////////////////////////////////////////////////////////////////////////
//这里不用判断链接的属性,只要做到点,属性和seen判断由外部决定
void CPriorizer::Push(const CPageStat& objPage)
{
if(s_hDbTask==INVALID_FDBHANDLE)
return;
//
//对于被动模式,不计算到点,外部计算是否seen;都设置bitmap
//主动的,计算到点的,就设置bitmap;外部计算是否seen(上次没有爬完的和主动的会冲突所以要算)
//seen判断也有外部来做
//
if (!objPage.m_bPassiveMode)
{
__int64 nowTime;
_time64( &nowTime ); //秒钟
if(nowTime-objPage.m_int64LastModifyTime < (1 << objPage.m_wPriority)*60)
{
return;
}
}
CBloomFilter::Instance()->insert(objPage.m_strUrl);
/*
CString m_strUrl;
CString m_strRefer;
DWORD m_dwContentFingerprint;
WORD m_wPriority;
__int64 m_int64LastModifyTime;
__int64 m_int64LastCrawlTime;
WORD m_wTotalCrawlCount;
WORD m_wHintCount;
BYTE m_bPassiveMode; //被动模式
*/
FITHANDLE key,value;
key=fdb_getitem();
value=fdb_getitem();
fdb_writeb(key,objPage.m_strUrl,objPage.m_strUrl.GetLength());
fdb_writeb(value,objPage.m_strRefer,objPage.m_strRefer.GetLength());
fdb_write4(value,objPage.m_dwContentFingerprint);
fdb_write2(value,objPage.m_wPriority);
fdb_write8(value,objPage.m_int64LastModifyTime);
fdb_write8(value,objPage.m_int64LastCrawlTime);
fdb_write2(value,objPage.m_wTotalCrawlCount);
fdb_write2(value,objPage.m_wHintCount);
fdb_write1(value,objPage.m_bPassiveMode);
fdb_put(s_hDbTask,key,value);
fdb_putitem(key);
fdb_putitem(value);
::InterlockedIncrement(& s_uOpCount);
if(s_uOpCount%20==0)
{
fdb_sync(s_hDbTask);
}
}
bool CPriorizer::Pop(CPageStat& objPage)
{
if(s_hDbTask==INVALID_FDBHANDLE)
return false;
/*
CString m_strUrl;
CString m_strRefer;
DWORD m_dwContentFingerprint;
WORD m_wPriority;
__int64 m_int64LastModifyTime;
__int64 m_int64LastCrawlTime;
WORD m_wTotalCrawlCount;
WORD m_wHintCount;
BYTE m_bPassiveMode; //被动模式
*/
FITHANDLE key,value;
key=fdb_getitem();
value=fdb_getitem();
if(!fdb_tpop(s_hDbTask,key,value))
{
fdb_putitem(key);
fdb_putitem(value);
return false;
}
char buffer[INTERNET_MAX_PATH_LENGTH] ={0};
WORD length = INTERNET_MAX_PATH_LENGTH;
length = INTERNET_MAX_PATH_LENGTH;
fdb_readb(key,buffer,length);
buffer[length]=0;
objPage.m_strUrl=buffer;
length = INTERNET_MAX_PATH_LENGTH;
fdb_readb(value,buffer,length);
buffer[length]=0;
objPage.m_strRefer=buffer;
fdb_read4(value,objPage.m_dwContentFingerprint);
fdb_read2(value,objPage.m_wPriority);
fdb_read8(value,objPage.m_int64LastModifyTime);
fdb_read8(value,objPage.m_int64LastCrawlTime);
fdb_read2(value,objPage.m_wTotalCrawlCount);
fdb_read2(value,objPage.m_wHintCount);
fdb_read1(value,objPage.m_bPassiveMode);
fdb_putitem(key);
fdb_putitem(value);
::InterlockedIncrement(& s_uOpCount);
if(s_uOpCount%20==0)
{
fdb_sync(s_hDbTask);
}
return true;
}
//////////////////////////////////////////////////////////////////////////
void CPriorizer::Init()
{
if(s_hDbTask!=INVALID_FDBHANDLE)
return;
CString strHost;
strHost.Format("%d",g_nSid);
s_hDbTask = fdb_openrw(ST_PAGETASK,strHost);
}
void CPriorizer::Fini()
{
if(s_hDbTask==INVALID_FDBHANDLE)
return;
fdb_close(s_hDbTask);
s_hDbTask = INVALID_FDBHANDLE;
}
void CPriorizer::Load()
{
if(s_hDbTask==INVALID_FDBHANDLE)
return;
FITHANDLE key,value;
CPageStat objPage;
char buffer[INTERNET_MAX_PATH_LENGTH] ={0};
WORD length = INTERNET_MAX_PATH_LENGTH;
key=fdb_getitem();
value=fdb_getitem();
FDBHANDLE cursor = fdb_tfirst(s_hDbTask,key,value);
if(cursor!=INVALID_FDBHANDLE)
{
do
{
length = INTERNET_MAX_PATH_LENGTH;
fdb_readb(key,buffer,length);
buffer[length]=0;
CBloomFilter::Instance()->insert(buffer); //最早的地方所以不用判断是否seen
fdb_resetitem(key);
fdb_resetitem(value);
} while(fdb_tnext(s_hDbTask,cursor,key,value));
fdb_tclose(s_hDbTask,cursor);
}
fdb_putitem(key);
fdb_putitem(value);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -