📄 htmltokenizer.cpp

📁 手机浏览器源码程序,功能强大
💻 CPP
📖 第 1 页 / 共 5 页
字号:
                        dest = buffer;
                        *dest++ = a;
#ifdef TOKEN_DEBUG
                        if (!a || (cBufferPos && *cBuffer == '!'))
                            kdDebug( 6036 ) << "Unknown attribute: *" << QCString(cBuffer, cBufferPos+1).data() << "*" << endl;
                        else
                            kdDebug( 6036 ) << "Known attribute: " << QCString(cBuffer, cBufferPos+1).data() << endl;
#endif

                        tag = SearchEqual;
                        break;
                    }
                }
                // Use tolower() instead of | 0x20 to lowercase the char because there is no
                // performance gain in using | 0x20 since tolower() is optimized and
                // | 0x20 turns characters such as '_' into junk.
                cBuffer[cBufferPos++] = tolower(curchar);
                ++src;
            }
            if ( cBufferPos == CBUFLEN ) {
                cBuffer[cBufferPos] = '\0';
                attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data());
                attrNamePresent = !attrName.isEmpty();
                dest = buffer;
                *dest++ = 0;
                tag = SearchEqual;
            }
            break;
        }
        case SearchEqual:
        {
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
                qDebug("SearchEqual");
#endif
            ushort curchar;
            bool atespace = false;
            while(!src.isEmpty()) {
                curchar = src->unicode();
                // In this mode just ignore any quotes we encounter and treat them like spaces.
                if (curchar > ' ' && curchar != '\'' && curchar != '"') {
                    if(curchar == '=') {
#ifdef TOKEN_DEBUG
                        kdDebug(6036) << "found equal" << endl;
#endif
                        tag = SearchValue;
                        ++src;
                    }
                    else {
#ifdef NOKIA_CHANGES
						// For some reason, RVCT compiler cannot correctly initialize global object DOM::emptyAtom.
                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, AtomicString("") );
#else
                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, emptyAtom );
#endif
                        dest = buffer;
                        tag = SearchAttribute;
                    }
                    break;
                }
                atespace = true;
                ++src;
            }
            break;
        }
        case SearchValue:
        {
            ushort curchar;
            while(!src.isEmpty()) {
                curchar = src->unicode();
                if(curchar > ' ') {
                    if(( curchar == '\'' || curchar == '\"' )) {
                        tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
                        tag = QuotedValue;
                        ++src;
                    } else
                        tag = Value;

                    break;
                }
                ++src;
            }
            break;
        }
        case QuotedValue:
        {
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
                qDebug("QuotedValue");
#endif
            ushort curchar;
            while(!src.isEmpty()) {
                RETURN_IF_OOM( checkBuffer() );

                curchar = src->unicode();
                if (curchar == '>' && !attrNamePresent) {
                    // Handle a case like <img '>.  Just go ahead and be willing
                    // to close the whole tag.  Don't consume the character and
                    // just go back into SearchEnd while ignoring the whole
                    // value.
                    // FIXME: Note that this is actually not a very good solution. It's
                    // an interim hack and doesn't handle the general case of
                    // unmatched quotes among attributes that have names. -dwh
                    while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
                        dest--; // remove trailing newlines
                    AtomicString v(buffer+1, dest-buffer-1);
                    attrName.setUnicode(buffer+1,dest-buffer-1);
                    currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
                    tag = SearchAttribute;
                    dest = buffer;
                    tquote = NoQuote;
                    break;
                }

                if(curchar <= '\'' && !src.escaped()) {
                    // ### attributes like '&{blaa....};' are supposed to be treated as jscript.
                    if ( curchar == '&' )
                    {
                        ++src;
                        parseEntity(src, dest, true);
                        break;
                    }
                    else if ( (tquote == SingleQuote && curchar == '\'') ||
                              (tquote == DoubleQuote && curchar == '\"') )
                    {
                        // some <input type=hidden> rely on trailing spaces. argh
                        while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
                            dest--; // remove trailing newlines
                        AtomicString v(buffer+1, dest-buffer-1);
                        if (!attrNamePresent)
                            attrName.setUnicode(buffer+1,dest-buffer-1);
                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);

                        dest = buffer;
                        tag = SearchAttribute;
                        tquote = NoQuote;
                        ++src;
                        break;
                    }
                }
                *dest = *src;
                fixUpChar(*dest);
                ++dest;
                ++src;
            }
            break;
        }
        case Value:
        {
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
            qDebug("Value");
#endif
            ushort curchar;
            while(!src.isEmpty()) {
                RETURN_IF_OOM( checkBuffer() );
                curchar = src->unicode();
                if(curchar <= '>' && !src.escaped()) {
                    // parse Entities
                    if ( curchar == '&' )
                    {
                        ++src;
                        parseEntity(src, dest, true);
                        break;
                    }
                    // no quotes. Every space means end of value
                    // '/' does not delimit in IE!
                    if ( curchar <= ' ' || curchar == '>' )
                    {
                        AtomicString v(buffer+1, dest-buffer-1);
                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
                        dest = buffer;
                        tag = SearchAttribute;
                        break;
                    }
                }

                *dest = *src;
                fixUpChar(*dest);
                ++dest;
                ++src;
            }
            break;
        }
        case SearchEnd:
        {
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
                qDebug("SearchEnd");
#endif
            while(!src.isEmpty()) {
                if (*src == '>' || *src == '<')
                    break;

                if (*src == '/')
                    currToken.flat = true;

                ++src;
            }
            if (src.isEmpty()) break;

            searchCount = 0; // Stop looking for '<!--' sequence
            tag = NoTag;
            tquote = NoQuote;

            if (*src != '<')
                ++src;

            if ( !currToken.id ) //stop if tag is unknown
                return;

            uint tagID = currToken.id;
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
            kdDebug( 6036 ) << "appending Tag: " << tagID << endl;
#endif
            bool beginTag = !currToken.flat && (tagID <= ID_CLOSE_TAG);

            if (tagID > ID_CLOSE_TAG)
                tagID -= ID_CLOSE_TAG;
            else if (tagID == ID_SCRIPT) {
                AttributeImpl* a = 0;
                bool foundTypeAttribute = false;
                scriptSrc = QString::null;
                scriptSrcCharset = QString::null;
                if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
		     parser->doc()->part() &&
                     parser->doc()->part()->jScriptEnabled() && /* jscript allowed at all? */
                     view /* are we a regular tokenizer or just for innerHTML ? */
                    ) {
                    if ( ( a = currToken.attrs->getAttributeItem( ATTR_SRC ) ) )
                        scriptSrc = parser->doc()->completeURL(parseURL( a->value() ).string() );
                    if ( ( a = currToken.attrs->getAttributeItem( ATTR_CHARSET ) ) )
                        scriptSrcCharset = a->value().string().stripWhiteSpace();
                    if ( scriptSrcCharset.isEmpty() )
                        scriptSrcCharset = parser->doc()->part()->encoding();
                    /* Check type before language, since language is deprecated */
                    if ((a = currToken.attrs->getAttributeItem(ATTR_TYPE)) != 0 && !a->value().string().isEmpty())
                        foundTypeAttribute = true;
                    else
                        a = currToken.attrs->getAttributeItem(ATTR_LANGUAGE);
                }
                javascript = true;

                if( foundTypeAttribute ) {
                    /*
                        Mozilla 1.5 accepts application/x-javascript, and some web references claim it is the only
                        correct variation, but WinIE 6 doesn't accept it.
                        Neither Mozilla 1.5 nor WinIE 6 accept application/javascript, application/ecmascript, or
                        application/x-ecmascript.
                        Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
                        Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
                        Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
                        Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
                        We want to accept all the values that either of these browsers accept, but not other values.
                     */
                    QString type = a->value().string().stripWhiteSpace().lower();
                    if( type.compare("application/x-javascript") != 0 &&
                        type.compare("text/javascript") != 0 &&
                        type.compare("text/javascript1.0") != 0 &&
                        type.compare("text/javascript1.1") != 0 &&
                        type.compare("text/javascript1.2") != 0 &&
                        type.compare("text/javascript1.3") != 0 &&
                        type.compare("text/javascript1.4") != 0 &&
                        type.compare("text/javascript1.5") != 0 &&
                        type.compare("text/jscript") != 0 &&
                        type.compare("text/ecmascript") != 0 &&
                        type.compare("text/livescript") )
                        javascript = false;
                } else if( a ) {
                    /*
                     Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
                     Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
                     Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
                     We want to accept all the values that either of these browsers accept, but not other values.
                     */
                    QString lang = a->value().string();
                    lang = lang.lower();
                    if( lang.compare("") != 0 &&
                        lang.compare("javascript") != 0 &&
                        lang.compare("javascript1.0") != 0 &&
                        lang.compare("javascript1.1") != 0 &&
                        lang.compare("javascript1.2") != 0 &&
                        lang.compare("javascript1.3") != 0 &&
                        lang.compare("javascript1.4") != 0 &&
                        lang.compare("javascript1.5") != 0 &&
                        lang.compare("ecmascript") != 0 &&
                        lang.compare("livescript") != 0 &&
                        lang.compare("jscript") )
                        javascript = false;
                }
            }

            processToken();

            // we have to take care to close the pre block in
            // case we encounter an unallowed element....
            if(pre && beginTag && !DOM::checkChild(ID_PRE, tagID)) {
                kdDebug(6036) << " not allowed in <pre> " << (int)tagID << endl;
                pre = false;
            }

            switch( tagID ) {
            case ID_PRE:
                prePos = 0;
                pre = beginTag;
                break;
            case ID_SCRIPT:
                if (beginTag) {
                    searchStopper = scriptEnd;
                    searchStopperLen = 8;
                    script = true;
                    parseSpecial(src);
                }
                else if (tagID <= ID_CLOSE_TAG) // Handle <script src="foo"/>
                    scriptHandler();
                break;
            case ID_STYLE:
                if (beginTag) {
                    searchStopper = styleEnd;
                    searchStopperLen = 7;
                    style = true;
                    parseSpecial(src);
                }
                break;
            case ID_TEXTAREA:
                if(beginTag) {
                    searchStopper = textareaEnd;
                    searchStopperLen = 10;
                    textarea = true;
                    parseSpecial(src);
                }
                break;
            case ID_TITLE:
                if (beginTag) {
                    searchStopper = titleEnd;
                    searchStopperLen = 7;
                    title = true;
                    parseSpecial(src);
                }
                break;
            case ID_XMP:
                if (beginTag) {
                    searchStopper = xmpEnd;
                    searchStopperLen = 5;
                    xmp = true;
                    parseSpecial(src);
                }
                break;
            case ID_SELECT:
                select = beginTag;
                break;
            case ID_PLAINTEXT:
                plaintext = beginTag;
                break;
            }

            if (beginTag && endTagRequirement(tagID) == FORBIDDEN)
                // Don't discard LFs since this element has no end tag.
                discard = NoneDiscard;

            return; // Finished parsing tag!
        }
        } // end switch
    }
    return;
}

void HTMLTokenizer::addPending()
{
    if ( select && !script )
    {
        *dest++ = ' ';
    }
    else if ( textarea || script )
    {
        switch(pending) {
        case LFPending:  *dest++ = '\n'; prePos = 0; break;
        case SpacePending: *dest++ = ' '; ++prePos; break;
        case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break;
        case NonePending:
            assert(0);
        }
    }
    else
💿 文件大小 1976 K
👤 上传用户 qingriwanxia
📂 所属分类通讯/手机编程
🏷️ 相关标签

#手机 #浏览器 #源码 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -