📄 htmltokenizer.cpp

📁 monqueror一个很具有参考价值的源玛
💻 CPP
📖 第 1 页 / 共 4 页
字号:
                       kdDebug( 6036 ) << "Known attribute: \"" << attrName << "\"" << endl;#endif                    }                    tag = SearchEqual;                    discard = SpaceDiscard; // discard spaces before '='                }                break;            }            case SearchEqual:            {                if( tquote )                {#ifdef TOKEN_DEBUG                      kdDebug( 6036 ) << "bad HTML in parseTag: SearchEqual" << endl;#endif                      // this is moslty due to a missing '"' somewhere before..                      // so let's start searching for a new tag                      tquote = NoQuote;                      Attribute a;                      a.id = *buffer;                      if(a.id==0) a.setName( attrName );                      a.setValue(0, 0);                      currToken->attrs.add(a);                      dest = buffer;                      tag = SearchAttribute;                      discard = SpaceDiscard;                      pending = NonePending;                }                else if( curchar == '=' )                {#ifdef TOKEN_DEBUG                    kdDebug(6036) << "found equal" << endl;#endif                    tag = SearchValue;                    pending = NonePending; // ignore spaces before '='                    discard = SpaceDiscard; // discard spaces after '='                    ++src;                }                else if( curchar == '>' )                    tag = SearchEnd;                else // other chars indicate a new attribte                {                    Attribute a;                    a.id = *buffer;                    if(a.id==0) a.setName( attrName );                    a.setValue(0, 0);                    currToken->attrs.add(a);                    dest = buffer;                    tag = SearchAttribute;                    discard = SpaceDiscard;                    pending = NonePending;                }                break;            }            case SearchValue:            {                if(tquote)                {                    tag = QuotedValue;                }                else                {                    tag = Value;                }                pending = NonePending;                break;            }            case QuotedValue:            {                // ### attributes like '&{blaa....};' are supposed to be treated as jscript.                if ( curchar == '&' )                {                    ++src;                    discard = NoneDiscard;                    if (pending)                        addPending();                    charEntity = true;                    parseEntity(src, dest, true);                    break;                }                else if ( !tquote )                {                    // end of attribute                    Attribute a;                    a.id = *buffer;                    if(a.id==0) a.setName( attrName );                    while(*(dest-1) == ' ' && dest>buffer+1) dest--; // remove trailing spaces                    a.setValue(buffer+1, dest-buffer-1);#ifdef TOKEN_DEBUG                    kdDebug() << "adding value: *" << QConstString(buffer+1, dest-buffer-1).string() << "*" << endl;#endif                    currToken->attrs.add(a);                    dest = buffer;                    tag = SearchAttribute;                    discard = SpaceDiscard;                    pending = NonePending;                    break;                }                if( pending ) addPending();                discard = NoneDiscard;                *dest++ = src[0];                ++src;                break;            }            case Value:            {                if( tquote )                {                  // additional quote. discard it, and define as end of                  // attribute#ifdef TOKEN_DEBUG                    kdDebug( 6036 ) << "bad HTML in parseTag: Value" << endl;#endif                    tquote = NoQuote;                }                // if discard==NoneDiscard at this point, it means                // that we passed an empty "" pair. bit hacky, but...                // helps with <tag attr=""otherattr="something">                if ( pending || src[0].latin1() == '>' || discard==NoneDiscard)                {                    // no quotes. Every space means end of value                    Attribute a;                    a.id = *buffer;                    if(a.id==0) a.setName( attrName );                    a.setValue(buffer+1, dest-buffer-1);#ifdef TOKEN_DEBUG                    kdDebug() << "adding value: *" << QConstString(buffer+1, dest-buffer-1).string() << "*" << endl;#endif                    currToken->attrs.add(a);                    dest = buffer;                    tag = SearchAttribute;                    discard = SpaceDiscard;                    pending = NonePending;                    break;                }                *dest++ = src[0];                ++src;                break;            }            case SearchEnd:            {                if ( curchar != '>')                {                    ++src; // discard everything, until we found the end                    break;                }                searchCount = 0; // Stop looking for '<!--' sequence                tag = NoTag;                tquote = NoQuote;                pending = NonePending; // Ignore pending spaces                ++src;                if ( currToken->id == 0 ) //stop if tag is unknown                {                    discard = NoneDiscard;                    *dest = QChar::null;                    return;                }                if(dest>buffer)                {                    // add the last attribute                    Attribute a;                    a.id = *buffer;                    if(a.id==0) a.setName( attrName );                    a.setValue(buffer+1, dest-buffer-1);                    currToken->attrs.add(a);                    dest = buffer;                }                uint tagID = currToken->id;#ifdef TOKEN_DEBUG                kdDebug( 6036 ) << "appending Tag: " << tagID << endl;#endif                bool beginTag = (tagID < ID_CLOSE_TAG);                if( beginTag && tagID != ID_IMG && tagID != ID_INPUT ) {                    // Ignore Space/LF's after a start tag                    discard = LFDiscard;                } else if (!beginTag) {                    // Don't ignore CR/LF's after a close tag                    discard = NoneDiscard;                    tagID -= ID_CLOSE_TAG;                }                if ( tagID == ID_SCRIPT  && beginTag ) {                    int attrIndex = currToken->attrs.find(ATTR_SRC);                    scriptSrc = (attrIndex == -1 ? (QString)"" : currToken->attrs[attrIndex]->value().string());                    attrIndex = currToken->attrs.find(ATTR_LANGUAGE);                    javascript = true;                    if( attrIndex != -1 ) {                        QString lang = currToken->attrs[attrIndex]->value().string();                        lang = lang.lower();                        if( !lang.contains("javascript") &&                            !lang.contains("ecmascript") &&                            !lang.contains("jscript") )                           javascript = false;                    } else {                        attrIndex = currToken->attrs.find(ATTR_TYPE);                        if( attrIndex != -1 ) {                            QString lang = currToken->attrs[attrIndex]->value().string();                            lang = lang.lower();                            if( !lang.contains("javascript") &&                                !lang.contains("ecmascript") &&                                !lang.contains("jscript") )                                javascript = false;                        }                    }                }                processToken();                if(pre)                {                    // we have to take care to close the pre block in                    // case we encounter an unallowed element....                    if(!DOM::checkChild(ID_PRE, tagID)) {                        //kdDebug(0) << " not allowed in <pre> " << (int)tagID << endl;                        pre = false;                    }                }                if ( tagID == ID_PRE )                {                    prePos = 0;                    pre = beginTag;                }                else if ( tagID == ID_TEXTAREA )                {                    if(beginTag) {                        listing = true;			textarea = true;                        searchCount = 0;                        searchFor = textareaEnd;                        scriptCode = QT_ALLOC_QCHAR_VEC( 1024 );                        scriptCodeSize = 0;                        scriptCodeMaxSize = 1024;                        parseListing(src);                    }                }                else if ( tagID == ID_SCRIPT )                {                    if (beginTag)                    {#ifdef TOKEN_DEBUG                        kdDebug( 6036 ) << "start of script, token->id = " << currToken->id << endl;#endif                        script = true;                        searchCount = 0;                        searchFor = scriptEnd;                        scriptCode = QT_ALLOC_QCHAR_VEC( 1024 );                        scriptCodeSize = 0;                        scriptCodeMaxSize = 1024;                        parseScript(src);#ifdef TOKEN_DEBUG                        kdDebug( 6036 ) << "end of script" << endl;#endif                    }                }                else if ( tagID == ID_STYLE )                {                    if (beginTag)                    {                        style = true;                        searchCount = 0;                        searchFor = styleEnd;                        scriptCode = QT_ALLOC_QCHAR_VEC( 1024 );                        scriptCodeSize = 0;                        scriptCodeMaxSize = 1024;                        parseStyle(src);                    }                }                else if ( tagID == ID_LISTING )                {                    if (beginTag)                    {                        listing = true;                        searchCount = 0;                        searchFor = listingEnd;                        scriptCode = QT_ALLOC_QCHAR_VEC( 1024 );                        scriptCodeSize = 0;                        scriptCodeMaxSize = 1024;                        parseListing(src);                    }                }                else if ( tagID == ID_SELECT )                {                    select = beginTag;                }                return; // Finished parsing tag!            }            default:            {#ifdef TOKEN_DEBUG                kdDebug( 6036 ) << "error in parseTag! " << __LINE__ << endl;#endif                return;            }            } // end switch        }    }    return;}void HTMLTokenizer::addPending(){    if ( tag || select)    {        *dest++ = ' ';    }    else if ( textarea )    {        if (pending == LFPending)            *dest++ = '\n';        else            *dest++ = ' ';    }    else if ( pre )    {        int p;        switch (pending)        {        case SpacePending:            // Insert a non-breaking space            *dest++ = QChar(' ');            prePos++;            break;        case LFPending:            *dest = '\n';            dest++;            prePos = 0;            break;        case TabPending:            p = TAB_SIZE - ( prePos % TAB_SIZE );            for ( int x = 0; x < p; x++ )            {                *dest = QChar(' ');                dest++;            }            prePos += p;            break;        default:#ifdef TOKEN_DEBUG            kdDebug( 6036 ) << "Assertion failed: pending = " << (int) pending << endl;#endif            break;        }    }    else    {        *dest++ = ' ';    }    pending = NonePending;}void HTMLTokenizer::setPlainText(){    if (!plaintext)    {       // Do this only once!       plaintext = true;       currToken->id = ID_PLAIN;       processToken();       dest = buffer;    }}void HTMLTokenizer::write( const QString &str ){    // we have to make this function reentrant. This is needed, because some    // script code could call document.write(), which would add something here.#ifdef TOKEN_DEBUG    kdDebug( 6036 ) << "Tokenizer::write(\"" << str << "\")" << endl;#endif#if DEBUG_BY_XHTANG	fprintf(stderr,"HTMLTokenizer::write strEmpty:%d buffer:%p\n",str.isEmpty(),buffer);	fprintf(stderr,"HTMLTokenizer::write str:%s\n",(const char*)str);#endif	if ( str.isEmpty() || buffer == 0L )        return;    // reentrant...    // we just insert the code at the tokenizers current position. Parsing will continue once    // we return from the script stuff    // (this won't happen if we're in the middle of loading an external script)    if(executingScript) {#if DEBUG_BY_XHTANG	fprintf(stderr,"HTMLTokenizer::write executingScript\n");#endif#ifdef TOKEN_DEBUG        kdDebug( 6036 ) << "adding to scriptOutput" << endl;#endif        scriptOutput += str;        return;    }    if (loadingExtScript) {#if DEBUG_BY_XHTANG	fprintf(stderr,"HTMLTokenizer::write loadingExtScript\n");			#endif	        // don't parse; we will do this later        pendingSrc += str;        return;    }    _src = str;    src = DOMStringIt(_src);    if(!currToken) currToken = new Token;    if (plaintext)        parseText(src);    else if (comment)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -