⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.cpp

📁 手机浏览器源码程序,功能强大
💻 CPP
📖 第 1 页 / 共 4 页
字号:
            {
                if(current->firstChild() == 0) {
                    e = new HTMLHtmlElementImpl(document);
                    insertNode(e);
                    handled = true;
                }
            }
            else if(current->isInline())
            {
                popInlineBlocks();
                handled = true;
            }
        }

        // if we couldn't handle the error, just rethrow the exception...
        if(!handled)
        {
            //kdDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()" << endl;
            return false;
        }

        return insertNode(n);
    }
}

NodeImpl *KHTMLParser::getElement(Token* t)
{
    switch (t->id)
    {
    case ID_HEAD:
        if (!head && current->id() == ID_HTML) {
            head = new HTMLHeadElementImpl(document);
            return head;
        }
        return 0;
    case ID_BODY:
        // body no longer allowed if we have a frameset
        if (haveFrameSet)
            return 0;
        popBlock(ID_HEAD);
        startBody();
        return new HTMLBodyElementImpl(document);

// frames
    case ID_FRAMESET:
        popBlock(ID_HEAD);
        if (inBody && !haveFrameSet && !haveContent) {
            popBlock(ID_BODY);
            // ### actually for IE document.body returns the now hidden "body" element
            // we can't implement that behaviour now because it could cause too many
            // regressions and the headaches are not worth the work as long as there is
            // no site actually relying on that detail (Dirk)
            const char* str = "display:none";  // RVCT compiler fix;
            if (doc()->body())
                doc()->body()->setAttribute(ATTR_STYLE, str);
            inBody = false;
        }
        if ((haveContent || haveFrameSet) && current->id() == ID_HTML)
            return 0;
        haveFrameSet = true;
        startBody();
        return new HTMLFrameSetElementImpl(document);
    // a bit of a special case, since the frame is inlined
    case ID_IFRAME:
        discard_until = ID_IFRAME + ID_CLOSE_TAG;
        break;

// form elements
    case ID_FORM:
        // Only create a new form if we're not already inside one.
        // This is consistent with other browsers' behavior.
        if (form)
            return 0;
        form = new HTMLFormElementImpl(document);
        return form;
    case ID_BUTTON:
        return new HTMLButtonElementImpl(document, form);
    case ID_FIELDSET:
        return new HTMLFieldSetElementImpl(document, form);
    case ID_INPUT:
        return new HTMLInputElementImpl(document, form);
    case ID_ISINDEX: {
        NodeImpl *n = handleIsindex(t);
        if (!inBody) {
            if (isindex)
                isindex->deref();
            isindex = n;
            isindex->ref();
            return 0;
        }
        t->flat = true;
        return n;
    }
    case ID_KEYGEN:
        return new HTMLKeygenElementImpl(document, form);
    case ID_LEGEND:
        return new HTMLLegendElementImpl(document, form);
    case ID_OPTGROUP:
        return new HTMLOptGroupElementImpl(document, form);
    case ID_OPTION:
        return new HTMLOptionElementImpl(document, form);
    case ID_SELECT:
        inSelect = true;
        return new HTMLSelectElementImpl(document, form);
    case ID_TEXTAREA:
        return new HTMLTextAreaElementImpl(document, form);

// lists
    case ID_DD:
        popBlock(ID_DT);
        popBlock(ID_DD);
        break;
    case ID_DT:
        popBlock(ID_DD);
        popBlock(ID_DT);
        break;
    case ID_LI:
        popBlock(ID_LI);
        break;

// anchor
    case ID_A:
        // Never allow nested <a>s.
        popBlock(ID_A);
        break;

// images
    case ID_IMG:
        return new HTMLImageElementImpl(document, form);
    case ID_MAP:
        map = new HTMLMapElementImpl(document);
        return map;

// tables
    case ID_TR:
        popBlock(ID_TR);
        break;
    case ID_TD:
    case ID_TH:
        popBlock(ID_TH);
        popBlock(ID_TD);
        break;
    case ID_TBODY:
    case ID_THEAD:
    case ID_TFOOT:
        popBlock(ID_THEAD);
        popBlock(ID_TBODY);
        popBlock(ID_TFOOT);
        break;

// elements with no special representation in the DOM
    case ID_TT:
    case ID_U:
    case ID_B:
    case ID_I:
    case ID_S:
    case ID_STRIKE:
    case ID_BIG:
    case ID_SMALL:
        if (!allowNestedRedundantTag(t->id))
            return 0;
        break;

    case ID_NOBR:
    case ID_WBR:
        popBlock(t->id); // Don't allow nested <nobr> or <wbr>
        break;

// these are special, and normally not rendered
    case ID_NOEMBED:
        discard_until = ID_NOEMBED + ID_CLOSE_TAG;
        return 0;
    case ID_NOFRAMES:
        discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
        return 0;
    case ID_NOSCRIPT:
        if (HTMLWidget && HTMLWidget->part()->jScriptEnabled())
            discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
        return 0;
    case ID_NOLAYER:
        //discard_until = ID_NOLAYER + ID_CLOSE_TAG;
        return 0;
    case ID_TEXT:
        return new TextImpl(document, t->text);
    case ID_COMMENT:
        if (!includesCommentsInDOM)
            return 0;
        break;
    }

    return document->document()->createHTMLElement(t->id);
}

#define MAX_REDUNDANT 20

bool KHTMLParser::allowNestedRedundantTag(int _id)
{
    // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
    // about 1500 tags, all from a bunch of <b>s.  We will only allow at most 20
    // nested tags of the same type before just ignoring them all together.
    int i = 0;
    for (HTMLStackElem* curr = blockStack;
         i < MAX_REDUNDANT && curr && curr->id == _id;
         curr = curr->next, i++);
    return i != MAX_REDUNDANT;
}

void KHTMLParser::processCloseTag(Token *t)
{
    // support for really broken html. Can't believe I'm supporting such crap (lars)
    switch(t->id)
    {
    case ID_HTML+ID_CLOSE_TAG:
    case ID_BODY+ID_CLOSE_TAG:
        // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
        // let's rely on the end() call to close things.
        return;
    case ID_FORM+ID_CLOSE_TAG:
        form = 0;
        // this one is to get the right style on the body element
        break;
    case ID_MAP+ID_CLOSE_TAG:
        map = 0;
        break;
    case ID_SELECT+ID_CLOSE_TAG:
        inSelect = false;
        break;
    default:
        break;
    }

#ifdef PARSER_DEBUG
    kdDebug( 6035 ) << "added the following childs to " << current->nodeName().string() << endl;
    NodeImpl *child = current->firstChild();
    while(child != 0)
    {
        kdDebug( 6035 ) << "    " << child->nodeName().string() << endl;
        child = child->nextSibling();
    }
#endif
    HTMLStackElem* oldElem = blockStack;
    popBlock(t->id-ID_CLOSE_TAG);
    if (oldElem == blockStack && t->id == ID_P+ID_CLOSE_TAG) {
        // We encountered a stray </p>.  Amazingly Gecko, WinIE, and MacIE all treat
        // this as a valid break, i.e., <p></p>.  So go ahead and make the empty
        // paragraph.
        t->id-=ID_CLOSE_TAG;
        parseToken(t);
        popBlock(ID_P);
    }
#ifdef PARSER_DEBUG
    kdDebug( 6035 ) << "closeTag --> current = " << current->nodeName().string() << endl;
#endif
}

bool KHTMLParser::isHeaderTag(int _id)
{
    switch (_id) {
        case ID_H1:
        case ID_H2:
        case ID_H3:
        case ID_H4:
        case ID_H5:
        case ID_H6:
            return true;
        default:
            return false;
    }
}

void KHTMLParser::popNestedHeaderTag()
{
    // This function only cares about checking for nested headers that have only inlines in between them.
    NodeImpl* currNode = current;
    for (HTMLStackElem* curr = blockStack; curr; curr = curr->next) {
        if (isHeaderTag(curr->id)) {
            popBlock(curr->id);
            return;
        }
        if (currNode && !currNode->isInline())
            return;
        currNode = curr->node;
    }
}

bool KHTMLParser::isResidualStyleTag(int _id)
{
    switch (_id) {
        case ID_A:
        case ID_FONT:
        case ID_TT:
        case ID_U:
        case ID_B:
        case ID_I:
        case ID_S:
        case ID_STRIKE:
        case ID_BIG:
        case ID_SMALL:
        case ID_EM:
        case ID_STRONG:
        case ID_DFN:
        case ID_CODE:
        case ID_SAMP:
        case ID_KBD:
        case ID_VAR:
            return true;
        default:
            return false;
    }
}

bool KHTMLParser::isAffectedByResidualStyle(int _id)
{
    if (isResidualStyleTag(_id))
        return true;

    switch (_id) {
        case ID_P:
        case ID_DIV:
        case ID_BLOCKQUOTE:
        case ID_ADDRESS:
        case ID_H1:
        case ID_H2:
        case ID_H3:
        case ID_H4:
        case ID_H5:
        case ID_H6:
        case ID_CENTER:
        case ID_UL:
        case ID_OL:
        case ID_LI:
        case ID_DL:
        case ID_DT:
        case ID_DD:
        case ID_PRE:
        case ID_FORM:
            return true;
        default:
            return false;
    }
}

void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
{
    // Find the element that crosses over to a higher level.   For now, if there is more than
    // one, we will just give up and not attempt any sort of correction.  It's highly unlikely that
    // there will be more than one, since <p> tags aren't allowed to be nested.
    int exceptionCode = 0;
    HTMLStackElem* curr = blockStack;
    HTMLStackElem* maxElem = 0;
    HTMLStackElem* prev = 0;
    HTMLStackElem* prevMaxElem = 0;
    while (curr && curr != elem) {
        if (curr->level > elem->level) {
            if (maxElem)
                return;
            maxElem = curr;
            prevMaxElem = prev;
        }

        prev = curr;
        curr = curr->next;
    }

    if (!curr || !maxElem || !isAffectedByResidualStyle(maxElem->id)) return;

    NodeImpl* residualElem = prev->node;
    NodeImpl* blockElem = prevMaxElem ? prevMaxElem->node : current;
    NodeImpl* parentElem = elem->node;

    // Check to see if the reparenting that is going to occur is allowed according to the DOM.
    // FIXME: We should either always allow it or perform an additional fixup instead of
    // just bailing here.
    // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
    if (!parentElem->childAllowed(blockElem))
        return;

    if (maxElem->node->parentNode() != elem->node) {
        // Walk the stack and remove any elements that aren't residual style tags.  These
        // are basically just being closed up.  Example:
        // <font><span>Moo<p>Goo</font></p>.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -