⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmltokenizer.cpp

📁 konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版本源码包.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
    m_executingScript--;    script = oldscript;}void HTMLTokenizer::parseComment(TokenizerString &src){    // SGML strict    bool strict = parser->doc()->inStrictMode() && parser->doc()->htmlMode() != DocumentImpl::XHtml && !script && !style;    int delimiterCount = 0;    bool canClose = false;    checkScriptBuffer(src.length());    while ( src.length() ) {        scriptCode[ scriptCodeSize++ ] = *src;#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1        qDebug("comment is now: *%s*", src.toString().left(16).latin1());#endif        if (strict)        {            if (src->unicode() == '-') {                delimiterCount++;                if (delimiterCount == 2) {                    delimiterCount = 0;                    canClose = !canClose;                }            }            else                delimiterCount = 0;        }        if ((!strict || canClose) && src->unicode() == '>')        {            bool handleBrokenComments =  brokenComments && !( script || style );            bool scriptEnd=false;            if (!strict)            {                if ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' &&                     scriptCode[scriptCodeSize-2] == '-' )                    scriptEnd=true;            }            if (canClose || handleBrokenComments || scriptEnd ){                ++src;                if ( !( script || xmp || textarea || style) ) {#ifdef COMMENTS_IN_DOM                    checkScriptBuffer();                    scriptCode[ scriptCodeSize ] = 0;                    scriptCode[ scriptCodeSize + 1 ] = 0;                    currToken.tid = ID_COMMENT;                    processListing(DOMStringIt(scriptCode, scriptCodeSize - 2));                    processToken();                    currToken.tid = ID_COMMENT + ID_CLOSE_TAG;                    processToken();#endif                    scriptCodeSize = 0;                }                comment = false;                return; // Finished parsing comment            }        }        ++src;    }}void HTMLTokenizer::parseServer(TokenizerString &src){    checkScriptBuffer(src.length());    while ( !src.isEmpty() ) {        scriptCode[ scriptCodeSize++ ] = *src;        if (src->unicode() == '>' &&            scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {            ++src;            server = false;            scriptCodeSize = 0;            return; // Finished parsing server include        }        ++src;    }}void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src){    char oldchar = 0;    while ( !src.isEmpty() )    {        unsigned char chbegin = src->latin1();        if(chbegin == '\'') {            tquote = tquote == SingleQuote ? NoQuote : SingleQuote;        }        else if(chbegin == '\"') {            tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;        }        // Look for '?>'        // some crappy sites omit the "?" before it, so        // we look for an unquoted '>' instead. (IE compatible)        else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) )        {            // We got a '?>' sequence            processingInstruction = false;            ++src;            discard=LFDiscard;            return; // Finished parsing comment!        }        ++src;        oldchar = chbegin;    }}void HTMLTokenizer::parseText(TokenizerString &src){    while ( !src.isEmpty() )    {        // do we need to enlarge the buffer?        checkBuffer();        // ascii is okay because we only do ascii comparisons        unsigned char chbegin = src->latin1();        if (skipLF && ( chbegin != '\n' ))        {            skipLF = false;        }        if (skipLF)        {            skipLF = false;            ++src;        }        else if (( chbegin == '\n' ) || ( chbegin == '\r' ))        {            if (chbegin == '\r')                skipLF = true;            *dest++ = '\n';            ++src;        }        else {            *dest++ = *src;            ++src;        }    }}void HTMLTokenizer::parseEntity(TokenizerString &src, QChar *&dest, bool start){    if( start )    {        cBufferPos = 0;        Entity = SearchEntity;    }    while( !src.isEmpty() )    {        ushort cc = src->unicode();        switch(Entity) {        case NoEntity:            return;            break;        case SearchEntity:            if(cc == '#') {                cBuffer[cBufferPos++] = cc;                ++src;                Entity = NumericSearch;            }            else                Entity = EntityName;            break;        case NumericSearch:            if(cc == 'x' || cc == 'X') {                cBuffer[cBufferPos++] = cc;                ++src;                Entity = Hexadecimal;            }            else if(cc >= '0' && cc <= '9')                Entity = Decimal;            else                Entity = SearchSemicolon;            break;        case Hexadecimal:        {            int uc = EntityChar.unicode();            int ll = kMin<uint>(src.length(), 8);            while(ll--) {                QChar csrc(src->lower());                cc = csrc.cell();                if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) {                    break;                }                uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));                cBuffer[cBufferPos++] = cc;                ++src;            }            EntityChar = QChar(uc);            Entity = SearchSemicolon;            break;        }        case Decimal:        {            int uc = EntityChar.unicode();            int ll = kMin(src.length(), 9-cBufferPos);            while(ll--) {                cc = src->cell();                if(src->row() || !(cc >= '0' && cc <= '9')) {                    Entity = SearchSemicolon;                    break;                }                uc = uc * 10 + (cc - '0');                cBuffer[cBufferPos++] = cc;                ++src;            }            EntityChar = QChar(uc);            if(cBufferPos == 9)  Entity = SearchSemicolon;            break;        }        case EntityName:        {            int ll = kMin(src.length(), 9-cBufferPos);            while(ll--) {                QChar csrc = *src;                cc = csrc.cell();                if(csrc.row() || !((cc >= 'a' && cc <= 'z') ||                                   (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {                    Entity = SearchSemicolon;                    break;                }                cBuffer[cBufferPos++] = cc;                ++src;                // be IE compatible and interpret even unterminated entities                // outside tags. like "foo &nbspstuff bla".                if ( tag == NoTag ) {                    const entity* e = kde_findEntity(cBuffer, cBufferPos);                    if ( e && e->code < 256 ) {                        Entity = SearchSemicolon;                        break;                    }                }            }            if(cBufferPos == 9) Entity = SearchSemicolon;            if(Entity == SearchSemicolon) {                if(cBufferPos > 1) {                    const entity *e = kde_findEntity(cBuffer, cBufferPos);                    if(e && ( e->code < 256 || *src == ';' ))                        EntityChar = e->code;                }            }            break;        }        case SearchSemicolon:#ifdef TOKEN_DEBUG            kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << endl;#endif            fixUpChar(EntityChar);            if (*src == ';')                    ++src;            if ( !EntityChar.isNull() ) {                checkBuffer();                // Just insert it                src.push( EntityChar );            } else {#ifdef TOKEN_DEBUG                kdDebug( 6036 ) << "unknown entity!" << endl;#endif                checkBuffer(10);                // ignore the sequence, add it to the buffer as plaintext                *dest++ = '&';                for(unsigned int i = 0; i < cBufferPos; i++)                    dest[i] = cBuffer[i];                dest += cBufferPos;                Entity = NoEntity;                if (pre)                    prePos += cBufferPos+1;            }            Entity = NoEntity;            EntityChar = QChar::null;            return;        };    }}void HTMLTokenizer::parseTag(TokenizerString &src){    assert(!Entity );    checkScriptBuffer( src.length() );    while ( !src.isEmpty() )    {        checkBuffer();#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1        uint l = 0;        while(l < src.length() && (src.toString()[l]).latin1() != '>')            l++;        qDebug("src is now: *%s*, tquote: %d",               src.toString().left(l).latin1(), tquote);#endif        switch(tag) {        case NoTag:            return;        case TagName:        {#if defined(TOKEN_DEBUG) &&  TOKEN_DEBUG > 1            qDebug("TagName");#endif            if (searchCount > 0)            {                if (*src == commentStart[searchCount])                {                    searchCount++;                    if (searchCount == 4)                    {#ifdef TOKEN_DEBUG                        kdDebug( 6036 ) << "Found comment" << endl;#endif                        // Found '<!--' sequence                        ++src;                        dest = buffer; // ignore the previous part of this tag                        tag = NoTag;                        comment = true;                        parseComment(src);                        return; // Finished parsing tag!                    }                    // cuts of high part, is okay                    cBuffer[cBufferPos++] = src->cell();                    ++src;                    break;                }                else                    searchCount = 0; // Stop looking for '<!--' sequence            }            bool finish = false;            unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos);            while(ll--) {                ushort curchar = *src;                if(curchar <= ' ' || curchar == '>' ) {                    finish = true;                    break;                }                // this is a nasty performance trick. will work for the A-Z                // characters, but not for others. if it contains one,                // we fail anyway                char cc = curchar;                cBuffer[cBufferPos++] = cc | 0x20;                ++src;            }            // Disadvantage: we add the possible rest of the tag            // as attribute names. ### judge if this causes problems            if(finish || CBUFLEN == cBufferPos) {                bool beginTag;                char* ptr = cBuffer;                unsigned int len = cBufferPos;                cBuffer[cBufferPos] = '\0';                if ((cBufferPos > 0) && (*ptr == '/'))                {                    // End Tag                    beginTag = false;                    ptr++;                    len--;                }                else                    // Start Tag                    beginTag = true;                // Accept empty xml tags like <br/>                if(len > 1 && ptr[len-1] == '/' ) {                    ptr[--len] = '\0';                    // if its like <br/> and not like <input/ value=foo>, take it as flat                    if (*src == '>')                        currToken.flat = true;                }                uint tagID = khtml::getTagID(ptr, len);                if (!tagID) {#ifdef TOKEN_DEBUG                    QCString tmp(ptr, len+1);                    kdDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"" << endl;#endif                    dest = buffer;                }                else                {#ifdef TOKEN_DEBUG                    QCString tmp(ptr, len+1);                    kdDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data() << endl;#endif                    currToken.tid = beginTag ? tagID : tagID + ID_CLOSE_TAG;                    dest = buffer;                }                tag = SearchAttribute;                cBufferPos = 0;            }            break;        }        case SearchAttribute:        {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1                qDebug("SearchAttribute");#endif            bool atespace = false;            ushort curchar;            while(!src.isEmpty()) {                curchar = *src;                if(curchar > ' ') {                    if(curchar == '<' || curchar == '>')                        tag = SearchEnd;                    else if(atespace && (curchar == '\'' || curchar == '"'))                    {                        tag = SearchValue;                        *dest++ = 0;                        attrName = QString::null;                    }                    else                        tag = AttributeName;                    cBufferPos = 0;                    break;                }                atespace = true;                ++src;            }            break;        }        case AttributeName:        {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -