⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmltokenizer.cpp

📁 konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版本源码包.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1                qDebug("AttributeName");#endif            ushort curchar;            int ll = kMin(src.length(), CBUFLEN-cBufferPos);            while(ll--) {                curchar = *src;                if(curchar <= '>') {                    if(curchar <= ' ' || curchar == '=' || curchar == '>') {                        unsigned int a;                        cBuffer[cBufferPos] = '\0';                        a = khtml::getAttrID(cBuffer, cBufferPos);                        if ( !a )                            attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data());                        dest = buffer;                        *dest++ = a;#ifdef TOKEN_DEBUG                        if (!a || (cBufferPos && *cBuffer == '!'))                            kdDebug( 6036 ) << "Unknown attribute: *" << QCString(cBuffer, cBufferPos+1).data() << "*" << endl;                        else                            kdDebug( 6036 ) << "Known attribute: " << QCString(cBuffer, cBufferPos+1).data() << endl;#endif                        // did we just get />                        if (!a && cBufferPos == 1 && *cBuffer == '/' && curchar == '>')                            currToken.flat = true;                        tag = SearchEqual;                        break;                    }                }                cBuffer[cBufferPos++] =                     (  curchar >= 'A' && curchar <= 'Z' ) ? curchar | 0x20 : curchar;                ++src;            }            if ( cBufferPos == CBUFLEN ) {                cBuffer[cBufferPos] = '\0';                attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data());                dest = buffer;                *dest++ = 0;                tag = SearchEqual;            }            break;        }        case SearchEqual:        {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1                qDebug("SearchEqual");#endif            ushort curchar;            bool atespace = false;            while(!src.isEmpty()) {                curchar = src->unicode();                if(curchar > ' ') {                    if(curchar == '=') {#ifdef TOKEN_DEBUG                        kdDebug(6036) << "found equal" << endl;#endif                        tag = SearchValue;                        ++src;                    }                    else if(atespace && (curchar == '\'' || curchar == '"'))                    {                        tag = SearchValue;                        *dest++ = 0;                        attrName = QString::null;                    }                    else {                        DOMString v("");                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);                        dest = buffer;                        tag = SearchAttribute;                    }                    break;                }                atespace = true;                ++src;            }            break;        }        case SearchValue:        {            ushort curchar;            while(!src.isEmpty()) {                curchar = src->unicode();                if(curchar > ' ') {                    if(( curchar == '\'' || curchar == '\"' )) {                        tquote = curchar == '\"' ? DoubleQuote : SingleQuote;                        tag = QuotedValue;                        ++src;                    } else                        tag = Value;                    break;                }                ++src;            }            break;        }        case QuotedValue:        {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1                qDebug("QuotedValue");#endif            ushort curchar;            while(!src.isEmpty()) {                checkBuffer();                curchar = src->unicode();                if(curchar <= '\'' && !src.escaped()) {                    // ### attributes like '&{blaa....};' are supposed to be treated as jscript.                    if ( curchar == '&' )                    {                        ++src;                        parseEntity(src, dest, true);                        break;                    }                    else if ( (tquote == SingleQuote && curchar == '\'') ||                              (tquote == DoubleQuote && curchar == '\"') )                    {                        // some <input type=hidden> rely on trailing spaces. argh                        while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))                            dest--; // remove trailing newlines                        DOMString v(buffer+1, dest-buffer-1);                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);                        dest = buffer;                        tag = SearchAttribute;                        tquote = NoQuote;                        ++src;                        break;                    }                }                *dest++ = *src;                ++src;            }            break;        }        case Value:        {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1            qDebug("Value");#endif            ushort curchar;            while(!src.isEmpty()) {                checkBuffer();                curchar = src->unicode();                if(curchar <= '>' && !src.escaped()) {                    // parse Entities                    if ( curchar == '&' )                    {                        ++src;                        parseEntity(src, dest, true);                        break;                    }                    // no quotes. Every space means end of value                    // '/' does not delimit in IE!                    if ( curchar <= ' ' || curchar == '>' )                    {                        DOMString v(buffer+1, dest-buffer-1);                        currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);                        dest = buffer;                        tag = SearchAttribute;                        break;                    }                }                *dest++ = *src;                ++src;            }            break;        }        case SearchEnd:        {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1                qDebug("SearchEnd");#endif            while(!src.isEmpty()) {                if(*src == '<' || *src == '>')                    break;                if (*src == '/')                    currToken.flat = true;                ++src;            }            if(src.isEmpty() && *src != '<' && *src != '>') break;            searchCount = 0; // Stop looking for '<!--' sequence            tag = NoTag;            tquote = NoQuote;            if ( *src == '>' )                ++src;            if ( !currToken.tid ) //stop if tag is unknown                return;            uint tagID = currToken.tid;#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0            kdDebug( 6036 ) << "appending Tag: " << tagID << endl;#endif            // If the tag requires an end tag it cannot be flat,            // unless we are using the HTML parser to parse XHTML            // The only exception is SCRIPT and priority 0 tokens.            if (tagID < ID_CLOSE_TAG && tagID != ID_SCRIPT &&                DOM::endTag[tagID] == DOM::REQUIRED &&                parser->doc()->htmlMode() != DocumentImpl::XHtml)                currToken.flat = false;            bool beginTag = !currToken.flat && (tagID < ID_CLOSE_TAG);            if(tagID >= ID_CLOSE_TAG)                tagID -= ID_CLOSE_TAG;            else if ( !brokenScript && tagID == ID_SCRIPT ) {                DOMStringImpl* a = 0;                bool foundTypeAttribute = false;                scriptSrc = scriptSrcCharset = QString::null;                if ( currToken.attrs && /* potentially have a ATTR_SRC ? */                     view &&  /* are we a regular tokenizer or just for innerHTML ? */                     parser->doc()->view()->part()->jScriptEnabled() /* jscript allowed at all? */                    ) {                    if ( ( a = currToken.attrs->getValue( ATTR_SRC ) ) )                        scriptSrc = parser->doc()->completeURL(khtml::parseURL( DOMString(a) ).string() );                    if ( ( a = currToken.attrs->getValue( ATTR_CHARSET ) ) )                        scriptSrcCharset = DOMString(a).string().stripWhiteSpace();                    if ( scriptSrcCharset.isEmpty() && view)                        scriptSrcCharset = parser->doc()->view()->part()->encoding();                    /* Check type before language, since language is deprecated */                    if ((a = currToken.attrs->getValue(ATTR_TYPE)) != 0 && !DOMString(a).string().isEmpty())                        foundTypeAttribute = true;                    else                        a = currToken.attrs->getValue(ATTR_LANGUAGE);                }                javascript = true;                if( foundTypeAttribute ) {                    /*                        Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.                        Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.			Mozilla 1.5 accepts application/x-javascript, WinIE 6 doesn't.                        Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.                        Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.                        We want to accept all the values that either of these browsers accept, but not other values.                     */                    QString type = DOMString(a).string().stripWhiteSpace().lower();                    if( type.compare("text/javascript") != 0 &&                        type.compare("text/javascript1.0") != 0 &&                        type.compare("text/javascript1.1") != 0 &&                        type.compare("text/javascript1.2") != 0 &&                        type.compare("text/javascript1.3") != 0 &&                        type.compare("text/javascript1.4") != 0 &&                        type.compare("text/javascript1.5") != 0 &&                        type.compare("text/jscript") != 0 &&                        type.compare("text/ecmascript") != 0 &&                        type.compare("text/livescript") != 0 &&			type.compare("application/x-javascript") != 0 &&			type.compare("application/ecmascript") != 0 )                        javascript = false;                } else if( a ) {                    /*                     Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.                     Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.                     Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.                     We want to accept all the values that either of these browsers accept, but not other values.                     */                    QString lang = DOMString(a).string();                    lang = lang.lower();                    if( lang.compare("") != 0 &&                        lang.compare("javascript") != 0 &&                        lang.compare("javascript1.0") != 0 &&                        lang.compare("javascript1.1") != 0 &&                        lang.compare("javascript1.2") != 0 &&                        lang.compare("javascript1.3") != 0 &&                        lang.compare("javascript1.4") != 0 &&                        lang.compare("javascript1.5") != 0 &&                        lang.compare("ecmascript") != 0 &&                        lang.compare("livescript") != 0 &&                        lang.compare("jscript") )                        javascript = false;                }            }            processToken();            if ( parser->selectMode() && beginTag)                discard = AllDiscard;            switch( tagID ) {            case ID_PRE:                pre = beginTag;                if (beginTag)                    discard = LFDiscard;                prePos = 0;                break;            case ID_BR:                prePos = 0;                break;            case ID_SCRIPT:                if (beginTag) {                    searchStopper = scriptEnd;                    searchStopperLen = 8;                    script = true;                    parseSpecial(src);                }                else if (tagID < ID_CLOSE_TAG) // Handle <script src="foo"/>                    scriptHandler();                break;            case ID_STYLE:                if (beginTag) {                    searchStopper = styleEnd;                    searchStopperLen = 7;                    style = true;                    parseSpecial(src);                }                break;            case ID_TEXTAREA:                if(beginTag) {                    searchStopper = textareaEnd;                    searchStopperLen = 10;                    textarea = true;                    discard = NoneDiscard;                    parseSpecial(src);                }                break;            case ID_TITLE:                if (beginTag) {                    searchStopper = titleEnd;                    searchStopperLen = 7;                    title = true;                    parseSpecial(src);                }                break;            case ID_XMP:                if (beginTag) {                    searchStopper = xmpEnd;                    searchStopperLen = 5;                    xmp = true;                    parseSpecial(src);                }                break;            case ID_SELECT:                select = beginTag;                break;            case ID_PLAINTEXT:                plaintext = beginTag;                break;            }            return; // Finished parsing tag!        }        } // end switch    }    return;}void HTMLTokenizer::addPending(){    if ( select && !(comment || script))    {        *dest++ = ' ';    }    else if ( textarea )    {        switch(pending) {        case LFPending:  *dest++ = '\n'; prePos = 0; break;        case SpacePending: *dest++ = ' '; ++prePos; break;        case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break;        case NonePending:            assert(0);        }    }    else    {        int p;        switch (pending)        {        case SpacePending:            // Insert a breaking space            *dest++ = QChar(' ');            prePos++;            break;        case LFPending:            *dest = '\n';            dest++;            prePos = 0;            break;        case TabPending:            p = TAB_SIZE - ( prePos % TAB_SIZE );            for ( int x = 0; x < p; x++ )                *dest++ = QChar(' ');            prePos += p;            break;        case NonePending:            assert(0);            break;        }    }    pending = NonePending;}void HTMLTokenizer::write( const TokenizerString &str, bool appendData ){#ifdef TOKEN_DEBUG    kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl;#endif    if ( !buffer )        return;    if ( ( m_executingScript && appendData ) ||         ( !m_executingScript && cachedScript.count() ) ) {        // don't parse; we will do this later        pendingSrc.append(str);        return;    }    if ( onHold ) {        src.append(str);        return;    }    setSrc(str);    m_abort = false;//     if (Entity)//         parseEntity(src, dest);    while ( !src.isEmpty() )    {        if ( m_abort )            return;        // do we need to enlarge the buffer?        checkBuffer();        ushort cc = src->unicode();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -