📄 basehtmlparser.h
字号:
/*
file: basehtmlparser.h
desc: simple html parser, return every tag and its attributes and texts
author: chen hua
create: 2005-03-16
*/
#ifndef _BaseHtmlParser_H_
#define _BaseHtmlParser_H_
#include <vector>
#include <string>
using namespace std;
class CBaseHtmlParser
{
public:
//!struct to save a string, with a pointer and a size
struct SZ_STRING
{
const char* pbData;
size_t cbData;
};
public:
CBaseHtmlParser();
virtual ~CBaseHtmlParser();
//Init or Destroy, do nothing here now
virtual void Initialize(){}
virtual void Destroy(){}
//Two interface to parser html page
virtual bool Parse(const string& URL,const string& Content);
virtual bool Parse(const SZ_STRING &strUrl,const SZ_STRING &strContent);
//Util api for get a absolute url based on current page
void Relativity2AbsoluteURL(string& URL);
//event when a tag begin, such as '<a href=..' , then strTagName is 'a', Attribs contains 'href'
virtual void OnStartTag(const SZ_STRING & strTagName,vector< pair<SZ_STRING,SZ_STRING> > Attribs){};
//event when a tag close, such as '</a>', then strTagName is 'a'
virtual void OnEndTag(const SZ_STRING & strTagName){};
//event when text between tags, such as '<>hello<>', then strData is 'hello'
virtual void OnData(const SZ_STRING & strData){};
//event when script or comment, such as '<!-- .../-->' or '<script ..> </script>'
virtual void OnComment(const SZ_STRING & strComment){};
private:
char m_szBaseURL[1024];
char m_szBaseDomain[1024];
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -