📄 htmlparse.h
字号:
/* This is My HtmlParser's first protype. * author: Feng Zeyue */#include <iostream>#include <string>#include <stdlib.h>enum tagtype { other = 0, title = 1, anchor = 2, titleend = 3, anchorend = 4, script = 5, fileend = 6, style = 7, styleend = 8, comment = 9};int parseposition = 0;int endflag;int bufparseflag = 4;string text;string tag;using namespace std;//class definitionclass HtmlParser {public: HtmlParser(); HtmlParser(string input); ~HtmlParser(); string GetAbusoluteURL(string input); bool SetBaseURL(string URL); bool Parse(); tagtype ParseTag(); bool ParseTitle(); bool ParseA(); bool ParseText(); bool ParseScript(); bool ParseStyle(); string GetURL(); string GetBaseURL();private: string baseURL; string URL;};//change the upper string to lowerbool MakeLower(string &s){ int i = 0; while(s[i] != '\0') { if((s[i] >= 65) && (s[i] <= 90)){ s[i] += 32; } i++; } return true;}//check whether arrive at the end of the bufferbool IsBufEnd(int &i) { if (i == 1023) { if(endflag == 1) { return false; } else{ bufparseflag = (bufparseflag + 1) % 5; endflag = readbuf(); } // cout<<buf[flag]; } i = (i + 1) % 1024; return true;}//Remove the target attribute from <a href..>string RemoveTarget(string &url){ int i = 0; string urlresult(url); while(url[i] != '\0') { if((url[i] == 't') && (url[i +1] =='a') && (url[i + 2] == 'r') && (url[i + 3] == 'g') && (url[i + 4] == 'e') && (url[i + 5] == 't')){ int j = i + 6; while(url[j] == ' ') { j ++; } if(url[j] == '=') { urlresult.clear(); j = 0; while(j < i) { urlresult += url[j]; j++; } return urlresult; } } i++; } return urlresult;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -