📄 htmltokenizer.cpp

📁 monqueror一个很具有参考价值的源玛
💻 CPP
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
        parseComment(src);    else if (script)        parseScript(src);    else if (style)        parseStyle(src);    else if (listing)        parseListing(src);    else if (processingInstruction)        parseProcessingInstruction(src);    else if (tag)    {        parseTag(src);    }    else if (charEntity)        parseEntity(src, dest);    while ( src.length() )    {        // do we need to enlarge the buffer?        checkBuffer();        // doesn't hurt because we only do ASCII comparisons        // use chbegin here instead of comparing equality with "src[0]"        // because this is slow (two function calls)        char chbegin = src[0].latin1();        if (skipLF && (chbegin != '\n'))            skipLF = false;        if (skipLF)        {            skipLF = false;            ++src;        }        else if ( startTag )        {            startTag = false;            switch(chbegin) {            case '/':            {                // Start of an End-Tag                if(pending == LFPending)                    pending = NonePending; // Ignore leading Spaces/LFs                break;            }            case '!':            {                // <!-- comment -->                searchCount = 1; // Look for '<!--' sequence to start comment                break;            }            case '?':            {                // xml processing instruction                processingInstruction = true;                parseProcessingInstruction(src);                continue;                break;            }            default:            {                if( ((chbegin >= 'a') && (chbegin <= 'z')) || ((chbegin >= 'A') && (chbegin <= 'Z')))                {                    // Start of a Start-Tag                }                else                {                    // Invalid tag                    // Add as is                    if (pending)                        addPending();                    *dest = '<';                    dest++;                    *dest++ = src[0];                    ++src;                    continue;                }            }            }; // end case            if(pending) addPending();            processToken();            tag = TagName;            parseTag(src);        }        else if ( chbegin == '&' )        {            ++src;            discard = NoneDiscard;            if (pending)                addPending();            charEntity = true;            parseEntity(src, dest, true);        }        else if ( chbegin == '<')        {            ++src;            startTag = true;            discard = NoneDiscard;        }        else if (( chbegin == '\n' ) || ( chbegin == '\r' ))        {            if ( pre || textarea)            {                if (discard == LFDiscard || discard == AllDiscard)                {                    // Ignore this LF                    discard = NoneDiscard; // We have discarded 1 LF                }                else                {                    // Process this LF                    if (pending)                        addPending();                    pending = LFPending;                }            }            else            {                if (discard == LFDiscard)                {                    // Ignore this LF                    discard = NoneDiscard; // We have discarded 1 LF                }                else if(discard == AllDiscard)                {                }                else                {                    // Process this LF                    if (pending == NonePending)                        pending = LFPending;                }            }            /* Check for MS-DOS CRLF sequence */            if (chbegin == '\r')            {                skipLF = true;            }            ++src;        }        else if (( chbegin == ' ' ) || ( chbegin == '\t' ))        {            if ( pre || textarea)            {                if (pending)                    addPending();                if (chbegin == ' ')                    pending = SpacePending;                else                    pending = TabPending;            }            else            {                if(discard == SpaceDiscard)                    discard = NoneDiscard;                else if(discard == AllDiscard)                { }                else                    pending = SpacePending;            }            ++src;        }        else        {            if (pending)                addPending();            discard = NoneDiscard;            if ( pre )            {                prePos++;            }            unsigned char row = src[0].row();            if ( row > 0x05 && row < 0x10 || row > 0xfd )                    currToken->complexText = true;            *dest++ = src[0];            ++src;        }    }    _src = QString();    if (noMoreData && !cachedScript)        end(); // this actually causes us to be deleted#if DEBUG_BY_XHTANG	fprintf(stderr,"HTMLTokenizer::write buffer:%p\n",buffer);#endif	}void HTMLTokenizer::end(){#if DEBUG_BY_XHTANG		fprintf(stderr,"HTMLTokenizer::end\n");		#endif    if ( buffer == 0 ) {        emit finishedParsing();        return;    }    if(currToken) processToken();    if(buffer)        QT_DELETE_QCHAR_VEC(buffer);    if(scriptCode)        QT_DELETE_QCHAR_VEC(scriptCode);    scriptCode = 0;    buffer = 0;    emit finishedParsing();}void HTMLTokenizer::finish(){#if 0 //DEBUG_BY_XHTANG		QString xqs(scriptCode, scriptCodeSize);		fprintf(stderr,"finished xqs:%s\n",xqs.latin1());#endif							// do this as long as we don't find matching comment ends    while(comment && scriptCode && scriptCodeSize > 0)    {        // we've found an unmatched comment start        scriptCode[ scriptCodeSize ] = 0;        scriptCode[ scriptCodeSize + 1 ] = 0;        int pos = QConstString(scriptCode, scriptCodeSize).string().find('>');        QString food;        food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy        QT_DELETE_QCHAR_VEC(scriptCode);        scriptCode = 0;        script = style = listing = comment = textarea = false;        scriptCodeSize = 0;        write(food);    }	#if DEBUG_BY_XHTANG	fprintf(stderr,"finished comment:%d scritpCode:%d scrSize:%d\n",comment,scriptCode,scriptCodeSize);#endif					    // this indicates we will not recieve any more data... but if we are waiting on    // an external script to load, we can't finish parsing until that is done    noMoreData = true;    if (!loadingExtScript && !executingScript)        end(); // this actually causes us to be deleted}void HTMLTokenizer::processToken(){    if ( dest > buffer )    {#ifdef TOKEN_DEBUG        if(currToken->id && currToken->id != ID_COMMENT)            kdDebug( 6036 ) << "Error in processToken!!!" << endl;#endif        if ( currToken->complexText ) {            // ### we do too much QString copying here, but better here than in RenderText...            // anyway have to find a better solution in the long run (lars)            QString s = QConstString(buffer, dest-buffer).string();            s.compose();            currToken->text = DOMString( s );        } else            currToken->text = DOMString( buffer, dest - buffer );        if (currToken->id != ID_COMMENT)            currToken->id = ID_TEXT;    }    else if(!currToken->id)        return;    dest = buffer;#ifdef TOKEN_PRINT    QString name = getTagName(currToken->id).string();    QString text = currToken->text.string();    kdDebug( 6036 ) << "Token --> " << name << "   id = " << currToken->id << endl;    if(currToken->text != 0)        kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;#else#ifdef TOKEN_DEBUG    QString name = getTagName(currToken->id).string();    QString text = currToken->text.string();    kdDebug( 6036 ) << "Token --> " << name << "   id = " << currToken->id << endl;    if(currToken->text != 0)        kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;    int l = currToken->attrs.length();    if(l>0)    {        int i = 0;        kdDebug( 6036 ) << "Attributes: " << l << endl;        while(i<l)        {            name = currToken->attrs.name(i).string();            text = currToken->attrs.value(i).string();            kdDebug( 6036 ) << "    " << currToken->attrs.id(i) << " " << name << "=" << text << endl;            i++;        }    }    kdDebug( 6036 ) << endl;#endif#endif    // pass the token over to the parser, the parser DOES NOT delete the token    parser->parseToken(currToken);    // ### FIXME: make this faster    delete currToken;    currToken = new Token;}HTMLTokenizer::~HTMLTokenizer(){    reset();}void HTMLTokenizer::enlargeBuffer(){    QChar *newbuf = QT_ALLOC_QCHAR_VEC( size*2 );    memcpy( newbuf, buffer, (dest - buffer + 1)*sizeof(QChar) );    dest = newbuf + ( dest - buffer );    QT_DELETE_QCHAR_VEC(buffer);    buffer = newbuf;    size *= 2;}//void HTMLTokenizer::notifyFinished(CachedObject *finishedObj)void HTMLTokenizer::notifyFinished(){#if 0 // following are origin codes    if (finishedObj == cachedScript) {#ifdef TOKEN_DEBUG        kdDebug( 6036 ) << "Finished loading an external script" << endl;#endif        loadingExtScript = false;        DOMString scriptSource = cachedScript->script();#ifdef TOKEN_DEBUG        kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl;#endif        cachedScript->deref(this);        cachedScript = 0;				// TODO		/*        executingScript = true;        view->part()->executeScript(scriptSource.string());        executingScript = false;		*/        // 'script' is true when we are called synchronously from        // parseScript(). In that case parseScript() will take care        // of 'scriptOutput'.        if (!script)        {           QString rest = scriptOutput+pendingSrc;           scriptOutput = pendingSrc = "";           write(rest);        }    }#endif	#ifdef JAVASCRIPT_ENABLE		// following are added for javascript		// javascript has not been loaded to local,so return.	if(!cachedScript||(cachedScript&&!cachedScript->isDataOk()))return;		loadingExtScript = false;	DOMString scriptSource = cachedScript->script();	cachedScript->deref(this);	cachedScript = 0;	//fprintf(stderr,"HTMLTokenizer::notifyFinished");	executingScript = true;    view->part()->executeScript(scriptSource.string());    executingScript = false;    // 'script' is true when we are called synchronously from    // parseScript(). In that case parseScript() will take care    // of 'scriptOutput'.    if (!script)		{		QString rest = scriptOutput+pendingSrc;		scriptOutput = pendingSrc = "";		write(rest);        }#endif					}//added for loadingscriptbool HTMLTokenizer::CanFinishParsing(){#ifdef JAVASCRIPT_ENABLE	return ((cachedScript&&cachedScript->isDataOk())||!cachedScript);#else	return 1;#endif	}void HTMLTokenizer::addScriptOutput(){    if ( !scriptOutput.isEmpty() ) {//      kdDebug( 6036 ) << "adding scriptOutput to parsed string" << endl;        QString newStr = scriptOutput;        newStr += QString(src.current(), src.length());        _src = newStr;        src = DOMStringIt(_src);        scriptOutput = "";    }}#include "htmltokenizer.moc"
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -