📄 htmlparser.cpp
字号:
{
if(current->firstChild() == 0) {
e = new HTMLHtmlElementImpl(document);
insertNode(e);
handled = true;
}
}
else if(current->isInline())
{
popInlineBlocks();
handled = true;
}
}
// if we couldn't handle the error, just rethrow the exception...
if(!handled)
{
//kdDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()" << endl;
return false;
}
return insertNode(n);
}
}
NodeImpl *KHTMLParser::getElement(Token* t)
{
switch (t->id)
{
case ID_HEAD:
if (!head && current->id() == ID_HTML) {
head = new HTMLHeadElementImpl(document);
return head;
}
return 0;
case ID_BODY:
// body no longer allowed if we have a frameset
if (haveFrameSet)
return 0;
popBlock(ID_HEAD);
startBody();
return new HTMLBodyElementImpl(document);
// frames
case ID_FRAMESET:
popBlock(ID_HEAD);
if (inBody && !haveFrameSet && !haveContent) {
popBlock(ID_BODY);
// ### actually for IE document.body returns the now hidden "body" element
// we can't implement that behaviour now because it could cause too many
// regressions and the headaches are not worth the work as long as there is
// no site actually relying on that detail (Dirk)
const char* str = "display:none"; // RVCT compiler fix;
if (doc()->body())
doc()->body()->setAttribute(ATTR_STYLE, str);
inBody = false;
}
if ((haveContent || haveFrameSet) && current->id() == ID_HTML)
return 0;
haveFrameSet = true;
startBody();
return new HTMLFrameSetElementImpl(document);
// a bit of a special case, since the frame is inlined
case ID_IFRAME:
discard_until = ID_IFRAME + ID_CLOSE_TAG;
break;
// form elements
case ID_FORM:
// Only create a new form if we're not already inside one.
// This is consistent with other browsers' behavior.
if (form)
return 0;
form = new HTMLFormElementImpl(document);
return form;
case ID_BUTTON:
return new HTMLButtonElementImpl(document, form);
case ID_FIELDSET:
return new HTMLFieldSetElementImpl(document, form);
case ID_INPUT:
return new HTMLInputElementImpl(document, form);
case ID_ISINDEX: {
NodeImpl *n = handleIsindex(t);
if (!inBody) {
if (isindex)
isindex->deref();
isindex = n;
isindex->ref();
return 0;
}
t->flat = true;
return n;
}
case ID_KEYGEN:
return new HTMLKeygenElementImpl(document, form);
case ID_LEGEND:
return new HTMLLegendElementImpl(document, form);
case ID_OPTGROUP:
return new HTMLOptGroupElementImpl(document, form);
case ID_OPTION:
return new HTMLOptionElementImpl(document, form);
case ID_SELECT:
inSelect = true;
return new HTMLSelectElementImpl(document, form);
case ID_TEXTAREA:
return new HTMLTextAreaElementImpl(document, form);
// lists
case ID_DD:
popBlock(ID_DT);
popBlock(ID_DD);
break;
case ID_DT:
popBlock(ID_DD);
popBlock(ID_DT);
break;
case ID_LI:
popBlock(ID_LI);
break;
// anchor
case ID_A:
// Never allow nested <a>s.
popBlock(ID_A);
break;
// images
case ID_IMG:
return new HTMLImageElementImpl(document, form);
case ID_MAP:
map = new HTMLMapElementImpl(document);
return map;
// tables
case ID_TR:
popBlock(ID_TR);
break;
case ID_TD:
case ID_TH:
popBlock(ID_TH);
popBlock(ID_TD);
break;
case ID_TBODY:
case ID_THEAD:
case ID_TFOOT:
popBlock(ID_THEAD);
popBlock(ID_TBODY);
popBlock(ID_TFOOT);
break;
// elements with no special representation in the DOM
case ID_TT:
case ID_U:
case ID_B:
case ID_I:
case ID_S:
case ID_STRIKE:
case ID_BIG:
case ID_SMALL:
if (!allowNestedRedundantTag(t->id))
return 0;
break;
case ID_NOBR:
case ID_WBR:
popBlock(t->id); // Don't allow nested <nobr> or <wbr>
break;
// these are special, and normally not rendered
case ID_NOEMBED:
discard_until = ID_NOEMBED + ID_CLOSE_TAG;
return 0;
case ID_NOFRAMES:
discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
return 0;
case ID_NOSCRIPT:
if (HTMLWidget && HTMLWidget->part()->jScriptEnabled())
discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
return 0;
case ID_NOLAYER:
//discard_until = ID_NOLAYER + ID_CLOSE_TAG;
return 0;
case ID_TEXT:
return new TextImpl(document, t->text);
case ID_COMMENT:
if (!includesCommentsInDOM)
return 0;
break;
}
return document->document()->createHTMLElement(t->id);
}
#define MAX_REDUNDANT 20
bool KHTMLParser::allowNestedRedundantTag(int _id)
{
// www.liceo.edu.mx is an example of a site that achieves a level of nesting of
// about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
// nested tags of the same type before just ignoring them all together.
int i = 0;
for (HTMLStackElem* curr = blockStack;
i < MAX_REDUNDANT && curr && curr->id == _id;
curr = curr->next, i++);
return i != MAX_REDUNDANT;
}
void KHTMLParser::processCloseTag(Token *t)
{
// support for really broken html. Can't believe I'm supporting such crap (lars)
switch(t->id)
{
case ID_HTML+ID_CLOSE_TAG:
case ID_BODY+ID_CLOSE_TAG:
// we never close the body tag, since some stupid web pages close it before the actual end of the doc.
// let's rely on the end() call to close things.
return;
case ID_FORM+ID_CLOSE_TAG:
form = 0;
// this one is to get the right style on the body element
break;
case ID_MAP+ID_CLOSE_TAG:
map = 0;
break;
case ID_SELECT+ID_CLOSE_TAG:
inSelect = false;
break;
default:
break;
}
#ifdef PARSER_DEBUG
kdDebug( 6035 ) << "added the following childs to " << current->nodeName().string() << endl;
NodeImpl *child = current->firstChild();
while(child != 0)
{
kdDebug( 6035 ) << " " << child->nodeName().string() << endl;
child = child->nextSibling();
}
#endif
HTMLStackElem* oldElem = blockStack;
popBlock(t->id-ID_CLOSE_TAG);
if (oldElem == blockStack && t->id == ID_P+ID_CLOSE_TAG) {
// We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
// this as a valid break, i.e., <p></p>. So go ahead and make the empty
// paragraph.
t->id-=ID_CLOSE_TAG;
parseToken(t);
popBlock(ID_P);
}
#ifdef PARSER_DEBUG
kdDebug( 6035 ) << "closeTag --> current = " << current->nodeName().string() << endl;
#endif
}
bool KHTMLParser::isHeaderTag(int _id)
{
switch (_id) {
case ID_H1:
case ID_H2:
case ID_H3:
case ID_H4:
case ID_H5:
case ID_H6:
return true;
default:
return false;
}
}
void KHTMLParser::popNestedHeaderTag()
{
// This function only cares about checking for nested headers that have only inlines in between them.
NodeImpl* currNode = current;
for (HTMLStackElem* curr = blockStack; curr; curr = curr->next) {
if (isHeaderTag(curr->id)) {
popBlock(curr->id);
return;
}
if (currNode && !currNode->isInline())
return;
currNode = curr->node;
}
}
bool KHTMLParser::isResidualStyleTag(int _id)
{
switch (_id) {
case ID_A:
case ID_FONT:
case ID_TT:
case ID_U:
case ID_B:
case ID_I:
case ID_S:
case ID_STRIKE:
case ID_BIG:
case ID_SMALL:
case ID_EM:
case ID_STRONG:
case ID_DFN:
case ID_CODE:
case ID_SAMP:
case ID_KBD:
case ID_VAR:
return true;
default:
return false;
}
}
bool KHTMLParser::isAffectedByResidualStyle(int _id)
{
if (isResidualStyleTag(_id))
return true;
switch (_id) {
case ID_P:
case ID_DIV:
case ID_BLOCKQUOTE:
case ID_ADDRESS:
case ID_H1:
case ID_H2:
case ID_H3:
case ID_H4:
case ID_H5:
case ID_H6:
case ID_CENTER:
case ID_UL:
case ID_OL:
case ID_LI:
case ID_DL:
case ID_DT:
case ID_DD:
case ID_PRE:
case ID_FORM:
return true;
default:
return false;
}
}
void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
{
// Find the element that crosses over to a higher level. For now, if there is more than
// one, we will just give up and not attempt any sort of correction. It's highly unlikely that
// there will be more than one, since <p> tags aren't allowed to be nested.
int exceptionCode = 0;
HTMLStackElem* curr = blockStack;
HTMLStackElem* maxElem = 0;
HTMLStackElem* prev = 0;
HTMLStackElem* prevMaxElem = 0;
while (curr && curr != elem) {
if (curr->level > elem->level) {
if (maxElem)
return;
maxElem = curr;
prevMaxElem = prev;
}
prev = curr;
curr = curr->next;
}
if (!curr || !maxElem || !isAffectedByResidualStyle(maxElem->id)) return;
NodeImpl* residualElem = prev->node;
NodeImpl* blockElem = prevMaxElem ? prevMaxElem->node : current;
NodeImpl* parentElem = elem->node;
// Check to see if the reparenting that is going to occur is allowed according to the DOM.
// FIXME: We should either always allow it or perform an additional fixup instead of
// just bailing here.
// Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
if (!parentElem->childAllowed(blockElem))
return;
if (maxElem->node->parentNode() != elem->node) {
// Walk the stack and remove any elements that aren't residual style tags. These
// are basically just being closed up. Example:
// <font><span>Moo<p>Goo</font></p>.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -