📄 template.h
字号:
//sunwangme@hotmail.com
#pragma once
#include <atlstr.h>
#include <vector>
#define DEFAULT_TEMPLATE_FILE_EX _T("pagecrawler.lua")
#define DEFAULT_USERAGENT _T("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)")
enum SEED_TYPE
{
ST_PATTERN=1, //pattern
ST_PAGE, //page
};
struct CSeedPattern
{
CString strPattern;
int nStart;
int nStop;
};
struct CSeedPage
{
std::vector<CString> arrPage;
};
struct CSeedItem
{
SEED_TYPE nType;
CSeedPattern objPattern;
CSeedPage objPage;
};
enum FILTER_TYPE
{
FT_SKIP=1, //skip
FT_PARSE, //parse
FT_STORE, //store
FT_REPLACE, //replace
};
enum TASK_TYPE
{
TT_SKIP=1,
TT_PARSE=2,
TT_STORE=4,
TT_REPLACE=8
};
struct CFilterItem
{
FILTER_TYPE nType;
CString strPattern;
};
struct CFingerItem
{
CString strURLPattern;
CString strFingerPattern;
};
class CTemplate
{
public:
static CTemplate* Instance();
virtual ~CTemplate();
static void Release();
protected:
CTemplate();
CTemplate(const CTemplate&);
CTemplate& operator=(const CTemplate&);
public:
void SetPath(const CString& strPath);
void Parse();
//CrawlerMgr的入口
public:
int GetSid();
int GetThread();
int GetConnect();
CString GetUserAgent();
CString GetCookie();
int GetCTimeOut();
int GetUtf8();
int GetNULLInBody();
int GetLog(){return m_nLog;}
const std::vector<CSeedItem>& GetSeedArray();
const std::vector<CFilterItem>& GetFilterArray();
const std::vector<CString>& GetContentTypeArray();
const std::vector<CFingerItem>& GetContentFingerArray();
//CTemplateEx的入口
public:
void SetSid(int nSid){m_nSid=nSid;}
void SetThread(int nThread){m_nThread=nThread;}
void SetCTimeOut(int nCTimeOut){m_nCTimeOut=nCTimeOut;}
void SetConnect(int nConnect){m_nConnect=nConnect;}
void SetUtf8(int nUtf8){m_nUtf8=nUtf8;}
void SetLog(int nLog){m_nLog=nLog;}
void SetUserAgent(LPCSTR lpszUserAgent){m_strUserAgent=lpszUserAgent;}
void SetCookie(LPCSTR lpszCookie){m_Cookie=lpszCookie;}
void SetNULLInBody(int nNULL){m_nNULLInBody=nNULL;}
void SetSeedPage(LPCSTR lpszPage)
{
CSeedItem objItem;
objItem.nType=ST_PAGE;
objItem.objPage.arrPage.push_back(lpszPage);
m_arrSeed.push_back(objItem);
}
void SetSeedPattern(LPCTSTR lpszPattern,int nStart,int nEnd)
{
CSeedItem objItem;
objItem.nType=ST_PATTERN;
objItem.objPattern.strPattern=lpszPattern;
objItem.objPattern.nStart=nStart;
objItem.objPattern.nStop=nEnd;
m_arrSeed.push_back(objItem);
}
void SetConnentType(LPCSTR lpszContentType)
{
m_arrContentType.push_back(lpszContentType);
}
void SetFilter(LPCSTR lpszFilter,FILTER_TYPE nType)
{
CFilterItem objItem;
objItem.nType=nType;
objItem.strPattern=lpszFilter;
m_arrFilter.push_back(objItem);
}
void AddFingerFilter(LPCSTR lpszFilter,LPCSTR lpszFinger)
{
CFingerItem objItem;
objItem.strURLPattern = lpszFilter;
objItem.strFingerPattern = lpszFinger;
m_arrFingerFilter.push_back(objItem);
}
//辅助函数
public:
static DWORD GetTaskTypeX(const CString& strUrl);
static DWORD GetTaskTypeX(LPCSTR lpszUrl,int nLength);
static CString GetContentTypeX();
private:
std::vector<CSeedItem> m_arrSeed;
std::vector<CFilterItem> m_arrFilter;
std::vector<CString> m_arrContentType;
std::vector<CFingerItem> m_arrFingerFilter;
int m_nSid;
int m_nThread;
int m_nConnect;
int m_nCTimeOut;
int m_nUtf8;
int m_nLog;
int m_nNULLInBody;
CString m_strUserAgent;
CString m_Cookie;
private:
CString m_strPath;
static CTemplate* s_pTemplate;
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -