⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 basehtmlparser.h

📁 htmlcxx 简单的html解析器
💻 H
字号:
/*
      file: basehtmlparser.h
      desc: simple html parser, return every tag and its attributes and texts
      author: chen hua 
      create: 2005-03-16
*/
#ifndef _BaseHtmlParser_H_
#define _BaseHtmlParser_H_
#include <vector>
#include <string>
using namespace std; 


class CBaseHtmlParser
{
public:
 //!struct to save a string, with a pointer and a size
 struct SZ_STRING
 {
  const char* pbData;
  size_t cbData;
 }; 

public:
 CBaseHtmlParser();
 virtual ~CBaseHtmlParser(); 

 //Init or Destroy, do nothing here now
 virtual void Initialize(){}
 virtual void Destroy(){} 

 //Two interface to parser html page
 virtual bool Parse(const string& URL,const string& Content);
 virtual bool Parse(const SZ_STRING &strUrl,const SZ_STRING &strContent); 

 //Util api for get a absolute url based on current page
 void Relativity2AbsoluteURL(string& URL); 

 //event when a tag begin, such as '<a href=..' , then strTagName is 'a', Attribs contains 'href'
 virtual void OnStartTag(const SZ_STRING & strTagName,vector< pair<SZ_STRING,SZ_STRING> > Attribs){};
 //event when a tag close, such as '</a>', then strTagName is 'a'
 virtual void OnEndTag(const SZ_STRING & strTagName){};
 //event when text between tags, such as '<>hello<>', then strData is 'hello'
 virtual void OnData(const SZ_STRING & strData){};
 //event when script or comment, such as '<!-- .../-->' or '<script ..> </script>'
 virtual void OnComment(const SZ_STRING & strComment){};
private:
 char m_szBaseURL[1024];
 char m_szBaseDomain[1024];
}; 

#endif 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -