📄 htmldocument.h
字号:
#ifndef _HTMLDOCUMENT_H_#define _HTMLDOCUMENT_H_#include <string>#include <fstream>#include <sstream>#include <ostream>class HTMLDocument {private: std::string filename; std::string& replace_all(std::string& s, const std::string& from, const std::string& to) { size_t lookHere = 0; size_t foundHere; while ((foundHere = s.find(from, lookHere)) != std::string::npos) { s.replace(foundHere, from.size(), to); lookHere = foundHere + to.size(); } return s; } std::string& strip_tags(std::string& s) { size_t leftPos; while ((leftPos = s.find('<')) != std::string::npos) { size_t rightPos = s.find('>', leftPos+1); if (rightPos != std::string::npos) { s.replace(leftPos, rightPos - leftPos + 1, " "); } } //HTML special chars replace_all(s, "<", " "); replace_all(s, ">", " "); replace_all(s, "&", " "); replace_all(s, " ", " "); //punctation and symbols replace_all(s, ",", " "); replace_all(s, ".", " "); replace_all(s, ";", " "); replace_all(s, ":", " "); replace_all(s, "'", " "); replace_all(s, "_", " "); replace_all(s, "~", " "); replace_all(s, "`", " "); replace_all(s, "-", " "); replace_all(s, "+", " "); replace_all(s, "=", " "); replace_all(s, "!", " "); replace_all(s, "?", " "); replace_all(s, "[", " "); replace_all(s, "]", " "); replace_all(s, "{", " "); replace_all(s, "}", " "); replace_all(s, ")", " "); replace_all(s, "(", " "); replace_all(s, "<", " "); replace_all(s, ">", " "); replace_all(s, "/", " "); replace_all(s, "\\", " "); replace_all(s, "\"", " "); replace_all(s, "\n", " "); replace_all(s, "@", " "); replace_all(s, "#", " "); replace_all(s, "$", " "); replace_all(s, "%", " "); replace_all(s, "^", " "); replace_all(s, "&", " "); replace_all(s, "*", " "); replace_all(s, "|", " "); replace_all(s, "0", " "); replace_all(s, "1", " "); replace_all(s, "2", " "); replace_all(s, "3", " "); replace_all(s, "4", " "); replace_all(s, "5", " "); replace_all(s, "6", " "); replace_all(s, "7", " "); replace_all(s, "8", " "); replace_all(s, "9", " "); // to lowercase transform(s.begin(), s.end(), s.begin(), tolower); return s; }public: HTMLDocument(){}; // TODO: provide a decent constructor. HTMLDocument(std::string& filename_) : filename(filename_) {}; std::string& get_filename() { return filename; } // FIXME: second argument should be const HTMLDocument but compiler complains!!! friend std::ostream& operator << (std::ostream& os, HTMLDocument& d) { std::ifstream in(d.filename.c_str()); std::ostringstream ss; ss << in.rdbuf(); std::string content = ss.str(); os << d.strip_tags(content); return os; }};#endif /* _HTMLDOCUMENT_H_ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -