📄 pagestat.cpp
字号:
#include "stdafx.h"
#include "PageStat.h"
#include "Template.h"
#include "Priorizer.h"
#include <WinInet.h>
#include "BloomFilter.h"
//////////////////////////////////////////////////////////////////////////
FDBHANDLE CPageStat::s_hDbStat = INVALID_FDBHANDLE;
long CPageStat::s_uOpCount = 0;
//////////////////////////////////////////////////////////////////////////
//对于被动模式的objPage都由这个直接算出来了,第一次爬行时候,全部都为被动模式
bool CPageStat::Build(CPageStat& objPage,LPCSTR lpszUrl,LPCSTR lpszRefer,bool bPassiveMode,DWORD dwType)
{
/* CString m_strUrl;
CString m_strRefer;
DWORD m_dwContentFingerprint;
WORD m_wPriority;
__int64 m_int64LastModifyTime;
__int64 m_int64LastCrawlTime;
WORD m_wTotalCrawlCount;
WORD m_wHintCount;
BYTE m_bPassiveMode;
*/
objPage.m_strUrl = lpszUrl;
objPage.m_strRefer = lpszRefer;
objPage.m_dwContentFingerprint = 0;
objPage.m_wPriority = 0;
objPage.m_int64LastModifyTime = 0;
objPage.m_int64LastCrawlTime = 0;
objPage.m_wTotalCrawlCount = 0;
objPage.m_wHintCount = 0;
objPage.m_bPassiveMode = bPassiveMode;
//设置推荐的Priority
if(dwType & TT_PARSE)
{
objPage.m_wPriority = _PRIORITY_BASE_PARSE_;
}
else if(dwType & TT_STORE)
{
objPage.m_wPriority = _PRIORITY_BASE_STORE_;
}
else
{
return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////
void CPageStat::Del(const CPageStat& objPage)
{
if(s_hDbStat==INVALID_FDBHANDLE)
return;
FITHANDLE key;
key = fdb_getitem();
fdb_writeb(key,objPage.m_strUrl,objPage.m_strUrl.GetLength());
fdb_del(s_hDbStat,key);
fdb_putitem(key);
::InterlockedIncrement(& s_uOpCount);
if(s_uOpCount%20==0)
{
fdb_sync(s_hDbStat);
}
}
void CPageStat::Build(CPageStat& objPage,FITHANDLE key,FITHANDLE value,bool bPassiveMode)
{
/* CString m_strUrl;
CString m_strRefer;
DWORD m_dwContentFingerprint;
WORD m_wPriority;
__int64 m_int64LastModifyTime;
__int64 m_int64LastCrawlTime;
WORD m_wTotalCrawlCount;
WORD m_wHintCount;
*/
char buffer[INTERNET_MAX_PATH_LENGTH] ={0};
WORD length = INTERNET_MAX_PATH_LENGTH;
length = INTERNET_MAX_PATH_LENGTH;
fdb_readb(key,buffer,length);
buffer[length]=0;
objPage.m_strUrl=buffer;
length = INTERNET_MAX_PATH_LENGTH;
fdb_readb(value,buffer,length);
buffer[length]=0;
objPage.m_strRefer=buffer;
fdb_read4(value,objPage.m_dwContentFingerprint);
fdb_read2(value,objPage.m_wPriority);
fdb_read8(value,objPage.m_int64LastModifyTime);
fdb_read8(value,objPage.m_int64LastCrawlTime);
fdb_read2(value,objPage.m_wTotalCrawlCount);
fdb_read2(value,objPage.m_wHintCount);
objPage.m_bPassiveMode = bPassiveMode;
}
bool CPageStat::Get(CPageStat& objPage)
{
if(s_hDbStat==INVALID_FDBHANDLE)
return false;
bool bResult = false;
FITHANDLE key,value;
key = fdb_getitem();
value = fdb_getitem();
fdb_writeb(key,objPage.m_strUrl,objPage.m_strUrl.GetLength());
if(fdb_get(s_hDbStat,key,value))
{
Build(objPage,key,value,true);
bResult=true;
::InterlockedIncrement(& s_uOpCount);
if(s_uOpCount%20==0)
{
fdb_sync(s_hDbStat);
}
}
fdb_putitem(key);
fdb_putitem(value);
return bResult;
}
//去一条数据看看是否能取到来测试是否为空
bool CPageStat::IsEmpty()
{
if(s_hDbStat==INVALID_FDBHANDLE)
return true;
bool bResult = true;
FITHANDLE key,value;
key = fdb_getitem();
value = fdb_getitem();
FDBHANDLE cursor = fdb_tfirst(s_hDbStat,key,value);
if(cursor!=INVALID_FDBHANDLE)
{
bResult=false;
fdb_tclose(s_hDbStat,cursor);
}
fdb_putitem(key);
fdb_putitem(value);
return bResult;
}
//注意被动模式的合并,被动和主动模式的合并,应该是业务层面自己的事情
//对于主动模式的,自身携带信息,在下载完毕后修改状态调用put
//对于被动模式的,自身开始时候没有携带信息,所以要先调用get,然后修改相关状态并调用put
void CPageStat::Put(const CPageStat& objPage)
{
if(s_hDbStat==INVALID_FDBHANDLE)
return;
/*
CString m_strUrl;
CString m_strRefer;
DWORD m_dwContentFingerprint;
WORD m_wPriority;
__int64 m_int64LastModifyTime;
__int64 m_int64LastCrawlTime;
WORD m_wTotalCrawlCount;
WORD m_wHintCount;
*/
FITHANDLE key,value;
key=fdb_getitem();
value=fdb_getitem();
fdb_writeb(key,objPage.m_strUrl,objPage.m_strUrl.GetLength());
fdb_writeb(value,objPage.m_strRefer,objPage.m_strRefer.GetLength());
fdb_write4(value,objPage.m_dwContentFingerprint);
fdb_write2(value,objPage.m_wPriority);
fdb_write8(value,objPage.m_int64LastModifyTime);
fdb_write8(value,objPage.m_int64LastCrawlTime);
fdb_write2(value,objPage.m_wTotalCrawlCount);
fdb_write2(value,objPage.m_wHintCount);
fdb_put(s_hDbStat,key,value);
fdb_putitem(key);
fdb_putitem(value);
::InterlockedIncrement(& s_uOpCount);
if(s_uOpCount%20==0)
{
fdb_sync(s_hDbStat);
}
}
void CPageStat::Init()
{
if(s_hDbStat!=INVALID_FDBHANDLE)
return;
CString strHost;
strHost.Format("%d",g_nSid);
s_hDbStat = fdb_openrw(ST_PAGESTAT,strHost);
}
void CPageStat::Fini()
{
if(s_hDbStat==INVALID_FDBHANDLE)
return;
fdb_close(s_hDbStat);
s_hDbStat = INVALID_FDBHANDLE;
}
//////////////////////////////////////////////////////////////////////////
bool CPageStat::Updater()
{
if(s_hDbStat==INVALID_FDBHANDLE)
return false;
FITHANDLE key,value;
CPageStat objPage;
key=fdb_getitem();
value=fdb_getitem();
FDBHANDLE cursor = fdb_tfirst(s_hDbStat,key,value);
if(cursor!=INVALID_FDBHANDLE)
{
do
{
Build(objPage,key,value,false);
if(!CBloomFilter::Instance()->find(objPage.m_strUrl))
CPriorizer::Push(objPage);
fdb_resetitem(key);
fdb_resetitem(value);
} while(fdb_tnext(s_hDbStat,cursor,key,value));
fdb_tclose(s_hDbStat,cursor);
}
fdb_putitem(key);
fdb_putitem(value);
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -