📄 htmltokenizer.cpp

📁 khtml在gtk上的移植版本
💻 CPP
📖 第 1 页 / 共 4 页
字号:
            scriptCodeSize = scriptCodeDest-scriptCode;        }        else {            scriptCode[scriptCodeSize] = *src;            fixUpChar(scriptCode[scriptCodeSize]);            ++scriptCodeSize;            ++src;        }    }}void HTMLTokenizer::scriptHandler(){    // We are inside a <script>    bool doScriptExec = false;    CachedScript* cs = 0;    // don't load external scripts for standalone documents (for now)    if (!scriptSrc.isEmpty() && parser->doc()->part()) {        // forget what we just got; load from src url instead        if ( !parser->skipMode() ) {            if ( (cs = parser->doc()->docLoader()->requestScript(scriptSrc, scriptSrcCharset) ))                cachedScript.enqueue(cs);        }        scriptSrc=QString::null;    }    else {#ifdef TOKEN_DEBUG        kdDebug( 6036 ) << "---START SCRIPT---" << endl;        kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl;        kdDebug( 6036 ) << "---END SCRIPT---" << endl;#endif        // Parse scriptCode containing <script> info        doScriptExec = true;    }    processListing(TokenizerString(scriptCode, scriptCodeSize));    QString exScript( buffer, dest-buffer );    processToken();    currToken.id = ID_SCRIPT + ID_CLOSE_TAG;    processToken();    TokenizerString prependingSrc;    if ( !parser->skipMode() ) {        if (cs) {             //kdDebug( 6036 ) << "cachedscript extern!" << endl;             //kdDebug( 6036 ) << "src: *" << QString( src.current(), src.length() ).latin1() << "*" << endl;             //kdDebug( 6036 ) << "pending: *" << pendingSrc.latin1() << "*" << endl;            pendingSrc.prepend(src);            setSrc(TokenizerString());            scriptCodeSize = scriptCodeResync = 0;            cs->ref(this);            // will be 0 if script was already loaded and ref() executed it            if (cachedScript.count())                loadingExtScript = true;        }        else if (view && doScriptExec && javascript ) {            if (!m_executingScript)                pendingSrc.prepend(src);            else                prependingSrc = src;            setSrc(TokenizerString());            scriptCodeSize = scriptCodeResync = 0;            //QTime dt;            //dt.start();            scriptExecution( exScript, QString::null, scriptStartLineno );	    //kdDebug( 6036 ) << "script execution time:" << dt.elapsed() << endl;        }    }    script = false;    scriptCodeSize = scriptCodeResync = 0;    if ( !m_executingScript && !loadingExtScript ) {	// kdDebug( 6036 ) << "adding pending Output to parsed string" << endl;	src.append(pendingSrc);	pendingSrc.clear();    } else if (!prependingSrc.isEmpty())        write(prependingSrc, false);}void HTMLTokenizer::scriptExecution( const QString& str, QString scriptURL,                                     int baseLine){#if APPLE_CHANGES    if (!view || !view->part())        return;#endif    bool oldscript = script;    m_executingScript++;    script = false;    QString url;        if (scriptURL.isNull())      url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL();    else      url = scriptURL;    view->part()->executeScript(url,baseLine,Node(),str);    m_executingScript--;    script = oldscript;}void HTMLTokenizer::parseComment(TokenizerString &src){    checkScriptBuffer(src.length());    while ( !src.isEmpty() ) {        scriptCode[ scriptCodeSize++ ] = *src;#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1        qDebug("comment is now: *%s*",               QConstString((QChar*)src.current(), QMIN(16, src.length())).string().latin1());#endif        if (src->unicode() == '>' &&            ( ( brokenComments && !( script || style ) ) ||              ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' &&                scriptCode[scriptCodeSize-2] == '-' ) ||              // Other browsers will accept --!> as a close comment, even though it's              // not technically valid.              ( scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' &&                scriptCode[scriptCodeSize-3] == '-' &&                scriptCode[scriptCodeSize-2] == '!' ) ) ) {            ++src;            if ( !( script || xmp || textarea || style) ) {#ifdef COMMENTS_IN_DOM                checkScriptBuffer();                scriptCode[ scriptCodeSize ] = 0;                scriptCode[ scriptCodeSize + 1 ] = 0;                currToken.id = ID_COMMENT;                processListing(TokenizerString(scriptCode, scriptCodeSize - 2));                processToken();                currToken.id = ID_COMMENT + ID_CLOSE_TAG;                processToken();#endif                scriptCodeSize = 0;            }            comment = false;            return; // Finished parsing comment        }        ++src;    }}void HTMLTokenizer::parseServer(TokenizerString &src){    checkScriptBuffer(src.length());    while ( !src.isEmpty() ) {        scriptCode[ scriptCodeSize++ ] = *src;        if (src->unicode() == '>' &&            scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {            ++src;            server = false;            scriptCodeSize = 0;            return; // Finished parsing server include        }        ++src;    }}void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src){    char oldchar = 0;    while ( !src.isEmpty() )    {        unsigned char chbegin = src->latin1();        if(chbegin == '\'') {            tquote = tquote == SingleQuote ? NoQuote : SingleQuote;        }        else if(chbegin == '\"') {            tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;        }        // Look for '?>'        // some crappy sites omit the "?" before it, so        // we look for an unquoted '>' instead. (IE compatible)        else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) )        {            // We got a '?>' sequence            processingInstruction = false;            ++src;            discard=LFDiscard;            return; // Finished parsing comment!        }        ++src;        oldchar = chbegin;    }}void HTMLTokenizer::parseText(TokenizerString &src){    while ( !src.isEmpty() )    {        // do we need to enlarge the buffer?        checkBuffer();        // ascii is okay because we only do ascii comparisons        unsigned char chbegin = src->latin1();        if (skipLF && ( chbegin != '\n' ))        {            skipLF = false;        }        if (skipLF)        {            skipLF = false;            ++src;        }        else if (( chbegin == '\n' ) || ( chbegin == '\r' ))        {            if (chbegin == '\r')                skipLF = true;            *dest++ = '\n';            ++src;        }        else {            *dest = *src;            fixUpChar(*dest);            ++dest;            ++src;        }    }}void HTMLTokenizer::parseEntity(TokenizerString &src, QChar *&dest, bool start){    if( start )    {        cBufferPos = 0;        Entity = SearchEntity;        EntityUnicodeValue = 0;    }    while( !src.isEmpty() )    {        ushort cc = src->unicode();        switch(Entity) {        case NoEntity:            assert(Entity != NoEntity);            return;                case SearchEntity:            if(cc == '#') {                cBuffer[cBufferPos++] = cc;                ++src;                Entity = NumericSearch;            }            else                Entity = EntityName;            break;        case NumericSearch:            if(cc == 'x' || cc == 'X') {                cBuffer[cBufferPos++] = cc;                ++src;                Entity = Hexadecimal;            }            else if(cc >= '0' && cc <= '9')                Entity = Decimal;            else                Entity = SearchSemicolon;            break;        case Hexadecimal:        {            int ll = kMin(src.length(), 8);            while(ll--) {                QChar csrc(src->lower());                cc = csrc.cell();                if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) {                    break;                }                EntityUnicodeValue = EntityUnicodeValue*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));                cBuffer[cBufferPos++] = cc;                ++src;            }            Entity = SearchSemicolon;            break;        }        case Decimal:        {            int ll = kMin(src.length(), 9-cBufferPos);            while(ll--) {                cc = src->cell();                if(src->row() || !(cc >= '0' && cc <= '9')) {                    Entity = SearchSemicolon;                    break;                }                EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');                cBuffer[cBufferPos++] = cc;                ++src;            }            if(cBufferPos == 9)  Entity = SearchSemicolon;            break;        }        case EntityName:        {            int ll = kMin(src.length(), 9-cBufferPos);            while(ll--) {                QChar csrc = *src;                cc = csrc.cell();                if(csrc.row() || !((cc >= 'a' && cc <= 'z') ||                                   (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {                    Entity = SearchSemicolon;                    break;                }                cBuffer[cBufferPos++] = cc;                ++src;            }            if(cBufferPos == 9) Entity = SearchSemicolon;            if(Entity == SearchSemicolon) {                if(cBufferPos > 1) {                    const entity *e = findEntity(cBuffer, cBufferPos);                    if(e)                        EntityUnicodeValue = e->code;                    // be IE compatible                    if(tag && EntityUnicodeValue > 255 && *src != ';')                        EntityUnicodeValue = 0;                }            }            else                break;        }        case SearchSemicolon:            //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl;            // Don't allow surrogate code points, or values that are more than 21 bits.            if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800)                    || (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) {                            if (*src == ';')                    ++src;                if (EntityUnicodeValue <= 0xFFFF) {                    QChar c(EntityUnicodeValue);                    fixUpChar(c);                    checkBuffer();                    src.push(c);                } else {                    // Convert to UTF-16, using surrogate code points.                    QChar c1(0xD800 | (((EntityUnicodeValue >> 16) - 1) << 6) | ((EntityUnicodeValue >> 10) & 0x3F));                    QChar c2(0xDC00 | (EntityUnicodeValue & 0x3FF));                    checkBuffer(2);                    src.push(c1);                    src.push(c2);                }            } else {#ifdef TOKEN_DEBUG                kdDebug( 6036 ) << "unknown entity!" << endl;#endif                checkBuffer(10);                // ignore the sequence, add it to the buffer as plaintext                *dest++ = '&';                for(unsigned int i = 0; i < cBufferPos; i++)                    dest[i] = cBuffer[i];                dest += cBufferPos;                if (pre)                    prePos += cBufferPos+1;            }            Entity = NoEntity;            return;        }    }}void HTMLTokenizer::parseTag(TokenizerString &src){    assert(!Entity );    while ( !src.isEmpty() )    {        checkBuffer();#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1        uint l = 0;        while(l < src.length() && (*(src.current()+l)).latin1() != '>')            l++;        qDebug("src is now: *%s*, tquote: %d",               QConstString((QChar*)src.current(), l).string().latin1(), tquote);#endif        switch(tag) {        case NoTag:        {            return;        }        case TagName:        {#if defined(TOKEN_DEBUG) &&  TOKEN_DEBUG > 1            qDebug("TagName");#endif            if (searchCount > 0)            {                if (*src == commentStart[searchCount])                {                    searchCount++;                    if (searchCount == 4)                    {#ifdef TOKEN_DEBUG                        kdDebug( 6036 ) << "Found comment" << endl;#endif                        // Found '<!--' sequence                        ++src;                        dest = buffer; // ignore the previous part of this tag                        comment = true;                        tag = NoTag;                        // Fix bug 34302 at kde.bugs.org.  Go ahead and treat                        // <!--> as a valid comment, since both mozilla and IE on windows                        // can handle this case.  Only do this in quirks mode. -dwh                        if (!src.isEmpty() && *src == '>' && parser->doc()->inCompatMode()) {                          comment = false;                          ++src;                          if (!src.isEmpty())                              cBuffer[cBufferPos++] = src->cell();                        }		        else                          parseComment(src);                        return; // Finished parsing tag!                    }                    // cuts of high part, is okay                    cBuffer[cBufferPos++] = src->cell();                    ++src;                    break;                }                else                    searchCount = 0; // Stop looking for '<!--' sequence            }            bool finish = false;            unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos);            while(ll--) {                ushort curchar = *src;                if(curchar <= ' ' || curchar == '>' ) {                    finish = true;                    break;                }                // Use tolower() instead of | 0x20 to lowercase the char because there is no                 // performance gain in using | 0x20 since tolower() is optimized and                 // | 0x20 turns characters such as '_' into junk.                cBuffer[cBufferPos++] = tolower(curchar);                ++src;            }            // Disadvantage: we add the possible rest of the tag            // as attribute names. ### judge if this causes problems            if(finish || CBUFLEN == cBufferPos) {                bool beginTag;                char* ptr = cBuffer;                unsigned int len = cBufferPos;                cBuffer[cBufferPos] = '\0';                if ((cBufferPos > 0) && (*ptr == '/'))                {                    // End Tag                    beginTag = false;                    ptr++;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -