⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 template.h

📁 概述:数据的纵向收集
💻 H
字号:
//sunwangme@hotmail.com
#pragma once

#include <atlstr.h>
#include <vector>

#define DEFAULT_TEMPLATE_FILE_EX	_T("pagecrawler.lua")
#define DEFAULT_USERAGENT _T("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)")

enum SEED_TYPE
{
	ST_PATTERN=1,	//pattern
	ST_PAGE,		//page
};

struct CSeedPattern
{
	CString strPattern;
	int nStart;
	int nStop;
};

struct CSeedPage
{
	std::vector<CString> arrPage;
};

struct CSeedItem 
{
	SEED_TYPE nType;
	CSeedPattern objPattern;
	CSeedPage objPage;	
};

enum FILTER_TYPE
{
	FT_SKIP=1,		//skip
	FT_PARSE,		//parse
	FT_STORE,		//store
	FT_REPLACE,		//replace
};

enum TASK_TYPE
{
	TT_SKIP=1,
	TT_PARSE=2,
	TT_STORE=4,
	TT_REPLACE=8
};

struct CFilterItem
{
	FILTER_TYPE nType;
	CString strPattern;
};

class CTemplate
{
public:
	static CTemplate* Instance();
	virtual ~CTemplate();
	static void Release();

protected:
	CTemplate();
	CTemplate(const CTemplate&);
	CTemplate& operator=(const CTemplate&);

public:
	void SetPath(const CString& strPath);
	void Parse();

//CrawlerMgr的入口
public:
	int	GetSid();
	int GetThread();
	int GetConnect();
	CString GetUserAgent();
	int GetCTimeOut();
	int GetUtf8();
	CString GetCookie();
	const std::vector<CSeedItem>& GetSeedArray();
	const std::vector<CFilterItem>& GetFilterArray();
	const std::vector<CString>& GetContentTypeArray();

//CTemplateEx的入口
public:
	void SetSid(int nSid){m_nSid=nSid;}
	void SetThread(int nThread){m_nThread=nThread;}
	void SetCTimeOut(int nCTimeOut){m_nCTimeOut=nCTimeOut;}
	void SetConnect(int nConnect){m_nConnect=nConnect;}
	void SetUtf8(int nUtf8){m_nUtf8=nUtf8;}
	void SetUserAgent(LPCSTR lpszUserAgent){m_strUserAgent=lpszUserAgent;}
	void SetCookie(LPCSTR lpszCookie){m_Cookie=lpszCookie;}
	void SetSeedPage(LPCSTR lpszPage)
	{
		CSeedItem objItem;
		objItem.nType=ST_PAGE;
		objItem.objPage.arrPage.push_back(lpszPage);
		m_arrSeed.push_back(objItem);
	}
	void SetSeedPattern(LPCTSTR lpszPattern,int nStart,int nEnd)
	{
		CSeedItem objItem;
		objItem.nType=ST_PATTERN;
		objItem.objPattern.strPattern=lpszPattern;
		objItem.objPattern.nStart=nStart;
		objItem.objPattern.nStop=nEnd;
		m_arrSeed.push_back(objItem);
	}
	void SetConnentType(LPCSTR lpszContentType)
	{
		m_arrContentType.push_back(lpszContentType);
	}
	void SetFilter(LPCSTR lpszFilter,FILTER_TYPE nType)
	{
		 CFilterItem objItem;
		 objItem.nType=nType;
		 objItem.strPattern=lpszFilter;
		 m_arrFilter.push_back(objItem);
	}

//辅助函数
public:
	static DWORD GetTaskTypeX(const CString& strUrl);
	static DWORD GetTaskTypeX(LPCSTR lpszUrl,int nLength);
	static CString GetContentTypeX();

private:
	std::vector<CSeedItem> m_arrSeed;
	std::vector<CFilterItem> m_arrFilter;
	std::vector<CString> m_arrContentType;
	int m_nSid;
	int m_nThread;
	int m_nConnect;
	int m_nCTimeOut;
	int m_nUtf8;
	CString m_strUserAgent;
	CString m_Cookie;

private:
	CString m_strPath;
	static CTemplate* s_pTemplate;
};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -