📄 qtexthtmlparser.cpp
字号:
0x0161, // 0x9A 0x203A, // 0x9B 0x0153, // 0x9C 0x009D, // 0x9D direct mapping 0x017E, // 0x9E 0x0178 // 0x9F};// the displayMode value is according to the what are blocks in the piecetable, not// what the w3c defines.static const QTextHtmlElement elements[Html_NumElements]= { { "a", Html_a, QTextHtmlElement::DisplayInline }, { "address", Html_address, QTextHtmlElement::DisplayInline }, { "b", Html_b, QTextHtmlElement::DisplayInline }, { "big", Html_big, QTextHtmlElement::DisplayInline }, { "blockquote", Html_blockquote, QTextHtmlElement::DisplayBlock }, { "body", Html_body, QTextHtmlElement::DisplayBlock }, { "br", Html_br, QTextHtmlElement::DisplayInline }, { "caption", Html_caption, QTextHtmlElement::DisplayBlock }, { "center", Html_center, QTextHtmlElement::DisplayBlock }, { "cite", Html_cite, QTextHtmlElement::DisplayInline }, { "code", Html_code, QTextHtmlElement::DisplayInline }, { "dd", Html_dd, QTextHtmlElement::DisplayBlock }, { "dfn", Html_dfn, QTextHtmlElement::DisplayInline }, { "div", Html_div, QTextHtmlElement::DisplayBlock }, { "dl", Html_dl, QTextHtmlElement::DisplayBlock }, { "dt", Html_dt, QTextHtmlElement::DisplayBlock }, { "em", Html_em, QTextHtmlElement::DisplayInline }, { "font", Html_font, QTextHtmlElement::DisplayInline }, { "h1", Html_h1, QTextHtmlElement::DisplayBlock }, { "h2", Html_h2, QTextHtmlElement::DisplayBlock }, { "h3", Html_h3, QTextHtmlElement::DisplayBlock }, { "h4", Html_h4, QTextHtmlElement::DisplayBlock }, { "h5", Html_h5, QTextHtmlElement::DisplayBlock }, { "h6", Html_h6, QTextHtmlElement::DisplayBlock }, { "head", Html_head, QTextHtmlElement::DisplayNone }, { "hr", Html_hr, QTextHtmlElement::DisplayBlock }, { "html", Html_html, QTextHtmlElement::DisplayInline }, { "i", Html_i, QTextHtmlElement::DisplayInline }, { "img", Html_img, QTextHtmlElement::DisplayInline }, { "kbd", Html_kbd, QTextHtmlElement::DisplayInline }, { "li", Html_li, QTextHtmlElement::DisplayBlock }, { "link", Html_link, QTextHtmlElement::DisplayNone }, { "meta", Html_meta, QTextHtmlElement::DisplayNone }, { "nobr", Html_nobr, QTextHtmlElement::DisplayInline }, { "ol", Html_ol, QTextHtmlElement::DisplayBlock }, { "p", Html_p, QTextHtmlElement::DisplayBlock }, { "pre", Html_pre, QTextHtmlElement::DisplayBlock }, { "qt", Html_body /*deliberate mapping*/, QTextHtmlElement::DisplayBlock }, { "s", Html_s, QTextHtmlElement::DisplayInline }, { "samp", Html_samp, QTextHtmlElement::DisplayInline }, { "small", Html_small, QTextHtmlElement::DisplayInline }, { "span", Html_span, QTextHtmlElement::DisplayInline }, { "strong", Html_strong, QTextHtmlElement::DisplayInline }, { "style", Html_style, QTextHtmlElement::DisplayNone }, { "sub", Html_sub, QTextHtmlElement::DisplayInline }, { "sup", Html_sup, QTextHtmlElement::DisplayInline }, { "table", Html_table, QTextHtmlElement::DisplayTable }, { "tbody", Html_tbody, QTextHtmlElement::DisplayTable }, { "td", Html_td, QTextHtmlElement::DisplayBlock }, { "tfoot", Html_tfoot, QTextHtmlElement::DisplayTable }, { "th", Html_th, QTextHtmlElement::DisplayBlock }, { "thead", Html_thead, QTextHtmlElement::DisplayTable }, { "title", Html_title, QTextHtmlElement::DisplayNone }, { "tr", Html_tr, QTextHtmlElement::DisplayTable }, { "tt", Html_tt, QTextHtmlElement::DisplayInline }, { "u", Html_u, QTextHtmlElement::DisplayInline }, { "ul", Html_ul, QTextHtmlElement::DisplayBlock }, { "var", Html_var, QTextHtmlElement::DisplayInline },};static bool operator<(const QString &str, const QTextHtmlElement &e){ return str < QLatin1String(e.name);}static bool operator<(const QTextHtmlElement &e, const QString &str){ return QLatin1String(e.name) < str;}static const QTextHtmlElement *lookupElement(const QString &element){ const QTextHtmlElement *start = &elements[0]; const QTextHtmlElement *end = &elements[Html_NumElements]; const QTextHtmlElement *e = qBinaryFind(start, end, element); if (e == end) return 0; return e;}int QTextHtmlParser::lookupElement(const QString &element){ const QTextHtmlElement *e = ::lookupElement(element); if (!e) return -1; return e->id;}// quotes newlines as "\\n"static QString quoteNewline(const QString &s){ QString n = s; n.replace(QLatin1Char('\n'), QLatin1String("\\n")); return n;}QTextHtmlParserNode::QTextHtmlParserNode() : parent(0), id(Html_unknown), cssFloat(QTextFrameFormat::InFlow), hasOwnListStyle(false), hasCssListIndent(false), isEmptyParagraph(false), isTextFrame(false), isRootFrame(false), displayMode(QTextHtmlElement::DisplayInline), hasHref(false), listStyle(QTextListFormat::ListStyleUndefined), imageWidth(-1), imageHeight(-1), tableBorder(0), tableCellRowSpan(1), tableCellColSpan(1), tableCellSpacing(2), tableCellPadding(0), borderBrush(Qt::darkGray), borderStyle(QTextFrameFormat::BorderStyle_Outset), userState(-1), cssListIndent(0), wsm(WhiteSpaceModeUndefined){ margin[QTextHtmlParser::MarginLeft] = 0; margin[QTextHtmlParser::MarginRight] = 0; margin[QTextHtmlParser::MarginTop] = 0; margin[QTextHtmlParser::MarginBottom] = 0;}void QTextHtmlParser::dumpHtml(){ for (int i = 0; i < count(); ++i) { qDebug().nospace() << qPrintable(QString(depth(i)*4, QLatin1Char(' '))) << qPrintable(at(i).tag) << ":" << quoteNewline(at(i).text); ; }}QTextHtmlParserNode *QTextHtmlParser::newNode(int parent){ QTextHtmlParserNode *lastNode = &nodes.last(); QTextHtmlParserNode *newNode = 0; bool reuseLastNode = true; if (nodes.count() == 1) { reuseLastNode = false; } else if (lastNode->tag.isEmpty()) { if (lastNode->text.isEmpty()) { reuseLastNode = true; } else { // last node is a text node (empty tag) with some text if (lastNode->text.length() == 1 && lastNode->text.at(0).isSpace()) { int lastSibling = count() - 2; while (lastSibling && at(lastSibling).parent != lastNode->parent && at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) { lastSibling = at(lastSibling).parent; } if (at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) { reuseLastNode = false; } else { reuseLastNode = true; } } else { // text node with real (non-whitespace) text -> nothing to re-use reuseLastNode = false; } } } else { // last node had a proper tag -> nothing to re-use reuseLastNode = false; } if (reuseLastNode) { newNode = lastNode; newNode->tag.clear(); newNode->text.clear(); newNode->id = Html_unknown; } else { nodes.resize(nodes.size() + 1); newNode = &nodes.last(); } newNode->parent = parent; return newNode;}void QTextHtmlParser::parse(const QString &text, const QTextDocument *_resourceProvider){ nodes.clear(); nodes.resize(1); txt = text; pos = 0; len = txt.length(); textEditMode = false; resourceProvider = _resourceProvider; parse(); //dumpHtml();}int QTextHtmlParser::depth(int i) const{ int depth = 0; while (i) { i = at(i).parent; ++depth; } return depth;}int QTextHtmlParser::margin(int i, int mar) const { int m = 0; const QTextHtmlParserNode *node; if (mar == MarginLeft || mar == MarginRight) { while (i) { node = &at(i); if (!node->isBlock() && node->id != Html_table) break; if (node->isTableCell()) break; m += node->margin[mar]; i = node->parent; } } return m;}int QTextHtmlParser::topMargin(int i) const{ if (!i) return 0; return at(i).margin[MarginTop];}int QTextHtmlParser::bottomMargin(int i) const{ if (!i) return 0; return at(i).margin[MarginBottom];}void QTextHtmlParser::eatSpace(){ while (pos < len && txt.at(pos).isSpace() && txt.at(pos) != QChar::ParagraphSeparator) pos++;}void QTextHtmlParser::parse() { QTextHtmlParserNode::WhiteSpaceMode wsm = QTextHtmlParserNode::WhiteSpaceNormal; while (pos < len) { QChar c = txt.at(pos++); if (c == QLatin1Char('<')) { parseTag(); wsm = nodes.last().wsm; } else if (c == QLatin1Char('&')) { nodes.last().text += parseEntity(); } else { nodes.last().text += c; } }}// parses a tag after "<"void QTextHtmlParser::parseTag(){ eatSpace(); // handle comments and other exclamation mark declarations if (hasPrefix(QLatin1Char('!'))) { parseExclamationTag(); if (nodes.last().wsm != QTextHtmlParserNode::WhiteSpacePre && nodes.last().wsm != QTextHtmlParserNode::WhiteSpacePreWrap && !textEditMode) eatSpace(); return; } // if close tag just close if (hasPrefix(QLatin1Char('/'))) { if (nodes.last().id == Html_style) { QCss::Parser parser(nodes.last().text); QCss::StyleSheet sheet; parser.parse(&sheet); inlineStyleSheets.append(sheet); resolveStyleSheetImports(sheet); } parseCloseTag(); return; } int p = last(); while (p && at(p).tag.size() == 0) p = at(p).parent; QTextHtmlParserNode *node = newNode(p); // parse tag name node->tag = parseWord().toLower(); const QTextHtmlElement *elem = ::lookupElement(node->tag); if (elem) { node->id = elem->id; node->displayMode = elem->displayMode; } else { node->id = Html_unknown; } node->attributes.clear(); // _need_ at least one space after the tag name, otherwise there can't be attributes if (pos < len && txt.at(pos).isSpace()) node->attributes = parseAttributes(); // resolveParent() may have to change the order in the tree and // insert intermediate nodes for buggy HTML, so re-initialize the 'node' // pointer through the return value node = resolveParent(); resolveNode(); const int nodeIndex = nodes.count() - 1; // this new node is always the last node->applyCssDeclarations(declarationsForNode(nodeIndex), resourceProvider); applyAttributes(node->attributes); // finish tag bool tagClosed = false; while (pos < len && txt.at(pos) != QLatin1Char('>')) { if (txt.at(pos) == QLatin1Char('/')) tagClosed = true; pos++; } pos++; // in a white-space preserving environment strip off a initial newline // since the element itself already generates a newline if ((node->wsm == QTextHtmlParserNode::WhiteSpacePre || node->wsm == QTextHtmlParserNode::WhiteSpacePreWrap) && node->isBlock()) { if (pos < len - 1 && txt.at(pos) == QLatin1Char('\n')) ++pos; } if (node->mayNotHaveChildren() || tagClosed) { newNode(node->parent); resolveNode(); }}// parses a tag beginning with "/"void QTextHtmlParser::parseCloseTag(){ ++pos; QString tag = parseWord().toLower().trimmed(); while (pos < len) { QChar c = txt.at(pos++); if (c == QLatin1Char('>')) break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -