📄 parserdom.cc
字号:
#include "ParserDom.h"#include "wincstring.h"#include <iostream>#include <vector>//#define DEBUG#include "debug.h"#define TAG_NAME_MAX 10using namespace std;using namespace htmlcxx; using namespace HTML; using namespace kp; const tree<HTML::Node>& ParserDom::parseTree(const std::string &html){ this->parse(html); return this->getTree();}void ParserDom::beginParsing(){ mHtmlTree.clear(); tree<HTML::Node>::iterator top = mHtmlTree.begin(); HTML::Node lambda_node; lambda_node.offset(0); lambda_node.length(0); lambda_node.isTag(true); lambda_node.isComment(false); mCurrentState = mHtmlTree.insert(top,lambda_node);}void ParserDom::endParsing(){ tree<HTML::Node>::iterator top = mHtmlTree.begin(); top->length(mCurrentOffset);}void ParserDom::foundComment(Node node){ //Add child content node, but do not update current state mHtmlTree.append_child(mCurrentState, node);}void ParserDom::foundText(Node node){ //Add child content node, but do not update current state mHtmlTree.append_child(mCurrentState, node);}void ParserDom::foundTag(Node node, bool isEnd){ if (!isEnd) { //append to current tree node tree<HTML::Node>::iterator next_state; next_state = mHtmlTree.append_child(mCurrentState, node); mCurrentState = next_state; } else { //Look if there is a pending open tag with that same name upwards //If mCurrentState tag isn't matching tag, maybe a some of its parents // matches vector< tree<HTML::Node>::iterator > path; tree<HTML::Node>::iterator i = mCurrentState; bool found_open = false; while (i != mHtmlTree.begin()) {#ifdef DEBUG cerr << "comparing " << node.tagName() << " with " << i->tagName()<<endl<<":"; if (!i->tagName().length()) cerr << "Tag with no name at" << i->offset()<<";"<<i->offset()+i->length();#endif assert(i->isTag()); assert(i->tagName().length()); bool equal; const char *open = i->tagName().c_str(); const char *close = node.tagName().c_str(); equal = !(strcasecmp(open,close)); if (equal) { DEBUGP("Found matching tag %s\n", i->tagName().c_str()); //Closing tag closes this tag //Set length to full range between the opening tag and //closing tag i->length(node.offset() + node.length() - i->offset()); i->closingText(node.text()); mCurrentState = mHtmlTree.parent(i); found_open = true; break; } else { path.push_back(i); } i = mHtmlTree.parent(i); } if (found_open) { //If match was upper in the tree, so we need to invalidate child //nodes that were waiting for a close for (unsigned int j = 0; j < path.size(); ++j) {// path[j]->length(node.offset() - path[j]->offset()); mHtmlTree.flatten(path[j]); } } else { DEBUGP("Unmatched tag %s\n", node.text().c_str()); // Treat as comment node.isTag(false); node.isComment(true); mHtmlTree.append_child(mCurrentState, node); } }}ostream &HTML::operator<<(ostream &stream, const tree<HTML::Node> &tr) { tree<HTML::Node>::pre_order_iterator it = tr.begin(); tree<HTML::Node>::pre_order_iterator end = tr.end(); int rootdepth = tr.depth(it); stream << "-----" << endl; unsigned int n = 0; while ( it != end ) { int cur_depth = tr.depth(it); for(int i=0; i < cur_depth - rootdepth; ++i) stream << " "; stream << n << "@"; stream << "[" << it->offset() << ";"; stream << it->offset() + it->length() << ") "; stream << (string)(*it) << endl; ++it, ++n; } stream << "-----" << endl; return stream;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -