📄 htmlparse.h

📁 html文件编码转换与解析

💻 H

字号:

/* This is My HtmlParser's first protype. * author: Feng Zeyue */#include <iostream>#include <string>#include <stdlib.h>enum tagtype {	other = 0,	title = 1,	anchor = 2,	titleend = 3,	anchorend = 4,	script = 5,	fileend = 6,	style = 7,	styleend = 8,	comment = 9};int parseposition = 0;int endflag;int bufparseflag = 4;string text;string tag;using namespace std;//class definitionclass HtmlParser {public:	HtmlParser();	HtmlParser(string input);	~HtmlParser();	string GetAbusoluteURL(string input);	bool SetBaseURL(string URL);	bool Parse();	tagtype ParseTag();	bool ParseTitle();	bool ParseA();	bool ParseText();	bool ParseScript();	bool ParseStyle();	string GetURL();	string GetBaseURL();private:	string baseURL;	string URL;};//change the upper string to lowerbool MakeLower(string &s){	int i = 0;	while(s[i] != '\0')	{		if((s[i] >= 65) && (s[i] <= 90)){			s[i] += 32;		}		i++;	}	return true;}//check whether arrive at the end of the bufferbool IsBufEnd(int &i) {	if (i == 1023) {		if(endflag == 1)		{			return false;		}		else{			bufparseflag = (bufparseflag + 1) % 5;			endflag = readbuf();		}		//		cout<<buf[flag];	}	i = (i + 1) % 1024;	return true;}//Remove the target attribute from <a href..>string RemoveTarget(string &url){	int i = 0;	string urlresult(url);	while(url[i] != '\0')	{		if((url[i] == 't') && (url[i +1] =='a') && (url[i + 2] == 'r') && (url[i + 3] == 'g') && (url[i + 4] == 'e') && (url[i + 5] == 't')){			int j = i + 6;			while(url[j] == ' ')			{				j ++;			}			if(url[j] == '=')			{				urlresult.clear();				j = 0;				while(j < i)				{					urlresult += url[j];					j++;				}				return urlresult;			}		}		i++;	}	return urlresult;}

⌨️ 快捷键说明

复制代码 Ctrl + C

搜索代码 Ctrl + F

全屏模式 F11

切换主题 Ctrl + Shift + D

显示快捷键 ?

增大字号 Ctrl + =

减小字号 Ctrl + -