📄 qtexthtmlparser.cpp
字号:
// find corresponding open node int p = last(); if (p > 0 && at(p - 1).tag == tag && at(p - 1).mayNotHaveChildren()) p--; while (p && at(p).tag != tag) p = at(p).parent; // simply ignore the tag if we can't find // a corresponding open node, for broken // html such as <font>blah</font></font> if (!p) return; // in a white-space preserving environment strip off a trailing newline // since the closing of the opening block element will automatically result // in a new block for elements following the <pre> // ...foo\n</pre><p>blah -> foo</pre><p>blah if ((at(p).wsm == QTextHtmlParserNode::WhiteSpacePre || at(p).wsm == QTextHtmlParserNode::WhiteSpacePreWrap) && at(p).isBlock()) { if (at(last()).text.endsWith(QLatin1Char('\n'))) nodes[last()].text.chop(1); } newNode(at(p).parent); resolveNode();}// parses a tag beginning with "!"void QTextHtmlParser::parseExclamationTag(){ ++pos; if (hasPrefix(QLatin1Char('-'),1) && hasPrefix(QLatin1Char('-'),2)) { pos += 3; // eat comments int end = txt.indexOf(QLatin1String("-->"), pos); pos = (end >= 0 ? end + 3 : len); } else { // eat internal tags while (pos < len) { QChar c = txt.at(pos++); if (c == QLatin1Char('>')) break; } }}// parses an entity after "&", and returns itQString QTextHtmlParser::parseEntity(){ int recover = pos; QString entity; while (pos < len) { QChar c = txt.at(pos++); if (c.isSpace() || pos - recover > 8) { goto error; } if (c == QLatin1Char(';')) break; entity += c; } { QChar resolved = resolveEntity(entity); if (!resolved.isNull()) return QString(resolved); } if (entity.length() > 1 && entity.at(0) == QLatin1Char('#')) { entity.remove(0, 1); // removing leading # int base = 10; bool ok = false; if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity? entity.remove(0, 1); base = 16; } uint uc = entity.toUInt(&ok, base); if (ok) { if (uc >= 0x80 && uc < 0x80 + (sizeof(windowsLatin1ExtendedCharacters)/sizeof(windowsLatin1ExtendedCharacters[0]))) uc = windowsLatin1ExtendedCharacters[uc - 0x80]; QString str; if (uc > 0xffff) { // surrogate pair uc -= 0x10000; ushort high = uc/0x400 + 0xd800; ushort low = uc%0x400 + 0xdc00; str.append(QChar(high)); str.append(QChar(low)); } else { str.append(QChar(uc)); } return str; } }error: pos = recover; return QLatin1String("&");}// parses one word, possibly quoted, and returns itQString QTextHtmlParser::parseWord(){ QString word; if (hasPrefix(QLatin1Char('\"'))) { // double quotes ++pos; while (pos < len) { QChar c = txt.at(pos++); if (c == QLatin1Char('\"')) break; else if (c == QLatin1Char('&')) word += parseEntity(); else word += c; } } else if (hasPrefix(QLatin1Char('\''))) { // single quotes ++pos; while (pos < len) { QChar c = txt.at(pos++); if (c == QLatin1Char('\'')) break; else word += c; } } else { // normal text while (pos < len) { QChar c = txt.at(pos++); if (c == QLatin1Char('>') || (c == QLatin1Char('/') && hasPrefix(QLatin1Char('>'), 1)) || c == QLatin1Char('<') || c == QLatin1Char('=') || c.isSpace()) { --pos; break; } if (c == QLatin1Char('&')) word += parseEntity(); else word += c; } } return word;}// gives the new node the right parentQTextHtmlParserNode *QTextHtmlParser::resolveParent(){ QTextHtmlParserNode *node = &nodes.last(); int p = node->parent; // Excel gives us buggy HTML with just tr without surrounding table tags // or with just td tags if (node->id == Html_td) { int n = p; while (n && at(n).id != Html_tr) n = at(n).parent; if (!n) { nodes.insert(nodes.count() - 1, QTextHtmlParserNode()); nodes.insert(nodes.count() - 1, QTextHtmlParserNode()); QTextHtmlParserNode *table = &nodes[nodes.count() - 3]; table->parent = p; table->id = Html_table; table->tag = QLatin1String("table"); table->children.append(nodes.count() - 2); // add row as child QTextHtmlParserNode *row = &nodes[nodes.count() - 2]; row->parent = nodes.count() - 3; // table as parent row->id = Html_tr; row->tag = QLatin1String("tr"); p = nodes.count() - 2; node = &nodes.last(); // re-initialize pointer } } if (node->id == Html_tr) { int n = p; while (n && at(n).id != Html_table) n = at(n).parent; if (!n) { nodes.insert(nodes.count() - 1, QTextHtmlParserNode()); QTextHtmlParserNode *table = &nodes[nodes.count() - 2]; table->parent = p; table->id = Html_table; table->tag = QLatin1String("table"); p = nodes.count() - 2; node = &nodes.last(); // re-initialize pointer } } // permit invalid html by letting block elements be children // of inline elements with the exception of paragraphs: // // a new paragraph closes parent inline elements (while loop), // unless they themselves are children of a non-paragraph block // element (if statement) // // For example: // // <body><p><b>Foo<p>Bar <-- second <p> implicitly closes <b> that // belongs to the first <p>. The self-nesting // check further down prevents the second <p> // from nesting into the first one then. // so Bar is not bold. // // <body><b><p>Foo <-- Foo should be bold. // // <body><b><p>Foo<p>Bar <-- Foo and Bar should be bold. // if (node->id == Html_p) { while (p && !at(p).isBlock()) p = at(p).parent; if (!p || at(p).id != Html_p) p = node->parent; } // some elements are not self nesting if (node->id == at(p).id && node->isNotSelfNesting()) p = at(p).parent; // some elements are not allowed in certain contexts while (p && !node->allowedInContext(at(p).id) // ### make new styles aware of empty tags || at(p).mayNotHaveChildren() ) { p = at(p).parent; } node->parent = p; // makes it easier to traverse the tree, later nodes[p].children.append(nodes.count() - 1); return node;}// sets all properties on the new nodevoid QTextHtmlParser::resolveNode(){ QTextHtmlParserNode *node = &nodes.last(); const QTextHtmlParserNode *parent = &nodes.at(node->parent); node->initializeProperties(parent, this);}bool QTextHtmlParserNode::isNestedList(const QTextHtmlParser *parser) const{ if (!isListStart()) return false; int p = parent; while (p) { if (parser->at(p).isListStart()) return true; p = parser->at(p).parent; } return false;}void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent, const QTextHtmlParser *parser){ // inherit properties from parent element charFormat = parent->charFormat; if (parent->displayMode == QTextHtmlElement::DisplayNone) displayMode = QTextHtmlElement::DisplayNone; if (parent->id != Html_table || id == Html_caption) { if (parent->blockFormat.hasProperty(QTextFormat::BlockAlignment)) blockFormat.setAlignment(parent->blockFormat.alignment()); else blockFormat.clearProperty(QTextFormat::BlockAlignment); } // we don't paint per-row background colors, yet. so as an // exception inherit the background color here // we also inherit the background between inline elements if ((parent->id != Html_tr || !isTableCell()) && (displayMode != QTextHtmlElement::DisplayInline || parent->displayMode != QTextHtmlElement::DisplayInline)) { charFormat.clearProperty(QTextFormat::BackgroundBrush); } listStyle = parent->listStyle; // makes no sense to inherit that property, a named anchor is a single point // in the document, which is set by the DocumentFragment charFormat.clearProperty(QTextFormat::AnchorName); wsm = parent->wsm; // initialize remaining properties margin[QTextHtmlParser::MarginLeft] = 0; margin[QTextHtmlParser::MarginRight] = 0; margin[QTextHtmlParser::MarginTop] = 0; margin[QTextHtmlParser::MarginBottom] = 0; cssFloat = QTextFrameFormat::InFlow; // set element specific attributes switch (id) { case Html_a: charFormat.setAnchor(true); for (int i = 0; i < attributes.count(); i += 2) { const QString key = attributes.at(i); if (key.compare(QLatin1String("href"), Qt::CaseInsensitive) == 0 && !attributes.at(i + 1).isEmpty()) { hasHref = true; charFormat.setUnderlineStyle(QTextCharFormat::SingleUnderline); charFormat.setForeground(Qt::blue); } } break; case Html_em: case Html_i: case Html_cite: case Html_address: case Html_var: case Html_dfn: charFormat.setFontItalic(true); break; case Html_big: charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1)); break; case Html_small: charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1)); break; case Html_strong: case Html_b: charFormat.setFontWeight(QFont::Bold); break; case Html_h1: charFormat.setFontWeight(QFont::Bold); charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(3)); margin[QTextHtmlParser::MarginTop] = 18; margin[QTextHtmlParser::MarginBottom] = 12; break; case Html_h2: charFormat.setFontWeight(QFont::Bold); charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(2)); margin[QTextHtmlParser::MarginTop] = 16; margin[QTextHtmlParser::MarginBottom] = 12; break; case Html_h3: charFormat.setFontWeight(QFont::Bold); charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1)); margin[QTextHtmlParser::MarginTop] = 14; margin[QTextHtmlParser::MarginBottom] = 12; break; case Html_h4: charFormat.setFontWeight(QFont::Bold); charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(0)); margin[QTextHtmlParser::MarginTop] = 12; margin[QTextHtmlParser::MarginBottom] = 12; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -