📄 htmltokenizer.cpp
字号:
parseComment(src); else if (script) parseScript(src); else if (style) parseStyle(src); else if (listing) parseListing(src); else if (processingInstruction) parseProcessingInstruction(src); else if (tag) { parseTag(src); } else if (charEntity) parseEntity(src, dest); while ( src.length() ) { // do we need to enlarge the buffer? checkBuffer(); // doesn't hurt because we only do ASCII comparisons // use chbegin here instead of comparing equality with "src[0]" // because this is slow (two function calls) char chbegin = src[0].latin1(); if (skipLF && (chbegin != '\n')) skipLF = false; if (skipLF) { skipLF = false; ++src; } else if ( startTag ) { startTag = false; switch(chbegin) { case '/': { // Start of an End-Tag if(pending == LFPending) pending = NonePending; // Ignore leading Spaces/LFs break; } case '!': { // <!-- comment --> searchCount = 1; // Look for '<!--' sequence to start comment break; } case '?': { // xml processing instruction processingInstruction = true; parseProcessingInstruction(src); continue; break; } default: { if( ((chbegin >= 'a') && (chbegin <= 'z')) || ((chbegin >= 'A') && (chbegin <= 'Z'))) { // Start of a Start-Tag } else { // Invalid tag // Add as is if (pending) addPending(); *dest = '<'; dest++; *dest++ = src[0]; ++src; continue; } } }; // end case if(pending) addPending(); processToken(); tag = TagName; parseTag(src); } else if ( chbegin == '&' ) { ++src; discard = NoneDiscard; if (pending) addPending(); charEntity = true; parseEntity(src, dest, true); } else if ( chbegin == '<') { ++src; startTag = true; discard = NoneDiscard; } else if (( chbegin == '\n' ) || ( chbegin == '\r' )) { if ( pre || textarea) { if (discard == LFDiscard || discard == AllDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else { // Process this LF if (pending) addPending(); pending = LFPending; } } else { if (discard == LFDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else if(discard == AllDiscard) { } else { // Process this LF if (pending == NonePending) pending = LFPending; } } /* Check for MS-DOS CRLF sequence */ if (chbegin == '\r') { skipLF = true; } ++src; } else if (( chbegin == ' ' ) || ( chbegin == '\t' )) { if ( pre || textarea) { if (pending) addPending(); if (chbegin == ' ') pending = SpacePending; else pending = TabPending; } else { if(discard == SpaceDiscard) discard = NoneDiscard; else if(discard == AllDiscard) { } else pending = SpacePending; } ++src; } else { if (pending) addPending(); discard = NoneDiscard; if ( pre ) { prePos++; } unsigned char row = src[0].row(); if ( row > 0x05 && row < 0x10 || row > 0xfd ) currToken->complexText = true; *dest++ = src[0]; ++src; } } _src = QString(); if (noMoreData && !cachedScript) end(); // this actually causes us to be deleted#if DEBUG_BY_XHTANG fprintf(stderr,"HTMLTokenizer::write buffer:%p\n",buffer);#endif }void HTMLTokenizer::end(){#if DEBUG_BY_XHTANG fprintf(stderr,"HTMLTokenizer::end\n"); #endif if ( buffer == 0 ) { emit finishedParsing(); return; } if(currToken) processToken(); if(buffer) QT_DELETE_QCHAR_VEC(buffer); if(scriptCode) QT_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0; buffer = 0; emit finishedParsing();}void HTMLTokenizer::finish(){#if 0 //DEBUG_BY_XHTANG QString xqs(scriptCode, scriptCodeSize); fprintf(stderr,"finished xqs:%s\n",xqs.latin1());#endif // do this as long as we don't find matching comment ends while(comment && scriptCode && scriptCodeSize > 0) { // we've found an unmatched comment start scriptCode[ scriptCodeSize ] = 0; scriptCode[ scriptCodeSize + 1 ] = 0; int pos = QConstString(scriptCode, scriptCodeSize).string().find('>'); QString food; food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy QT_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0; script = style = listing = comment = textarea = false; scriptCodeSize = 0; write(food); } #if DEBUG_BY_XHTANG fprintf(stderr,"finished comment:%d scritpCode:%d scrSize:%d\n",comment,scriptCode,scriptCodeSize);#endif // this indicates we will not recieve any more data... but if we are waiting on // an external script to load, we can't finish parsing until that is done noMoreData = true; if (!loadingExtScript && !executingScript) end(); // this actually causes us to be deleted}void HTMLTokenizer::processToken(){ if ( dest > buffer ) {#ifdef TOKEN_DEBUG if(currToken->id && currToken->id != ID_COMMENT) kdDebug( 6036 ) << "Error in processToken!!!" << endl;#endif if ( currToken->complexText ) { // ### we do too much QString copying here, but better here than in RenderText... // anyway have to find a better solution in the long run (lars) QString s = QConstString(buffer, dest-buffer).string(); s.compose(); currToken->text = DOMString( s ); } else currToken->text = DOMString( buffer, dest - buffer ); if (currToken->id != ID_COMMENT) currToken->id = ID_TEXT; } else if(!currToken->id) return; dest = buffer;#ifdef TOKEN_PRINT QString name = getTagName(currToken->id).string(); QString text = currToken->text.string(); kdDebug( 6036 ) << "Token --> " << name << " id = " << currToken->id << endl; if(currToken->text != 0) kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;#else#ifdef TOKEN_DEBUG QString name = getTagName(currToken->id).string(); QString text = currToken->text.string(); kdDebug( 6036 ) << "Token --> " << name << " id = " << currToken->id << endl; if(currToken->text != 0) kdDebug( 6036 ) << "text: \"" << text << "\"" << endl; int l = currToken->attrs.length(); if(l>0) { int i = 0; kdDebug( 6036 ) << "Attributes: " << l << endl; while(i<l) { name = currToken->attrs.name(i).string(); text = currToken->attrs.value(i).string(); kdDebug( 6036 ) << " " << currToken->attrs.id(i) << " " << name << "=" << text << endl; i++; } } kdDebug( 6036 ) << endl;#endif#endif // pass the token over to the parser, the parser DOES NOT delete the token parser->parseToken(currToken); // ### FIXME: make this faster delete currToken; currToken = new Token;}HTMLTokenizer::~HTMLTokenizer(){ reset();}void HTMLTokenizer::enlargeBuffer(){ QChar *newbuf = QT_ALLOC_QCHAR_VEC( size*2 ); memcpy( newbuf, buffer, (dest - buffer + 1)*sizeof(QChar) ); dest = newbuf + ( dest - buffer ); QT_DELETE_QCHAR_VEC(buffer); buffer = newbuf; size *= 2;}//void HTMLTokenizer::notifyFinished(CachedObject *finishedObj)void HTMLTokenizer::notifyFinished(){#if 0 // following are origin codes if (finishedObj == cachedScript) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Finished loading an external script" << endl;#endif loadingExtScript = false; DOMString scriptSource = cachedScript->script();#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl;#endif cachedScript->deref(this); cachedScript = 0; // TODO /* executingScript = true; view->part()->executeScript(scriptSource.string()); executingScript = false; */ // 'script' is true when we are called synchronously from // parseScript(). In that case parseScript() will take care // of 'scriptOutput'. if (!script) { QString rest = scriptOutput+pendingSrc; scriptOutput = pendingSrc = ""; write(rest); } }#endif #ifdef JAVASCRIPT_ENABLE // following are added for javascript // javascript has not been loaded to local,so return. if(!cachedScript||(cachedScript&&!cachedScript->isDataOk()))return; loadingExtScript = false; DOMString scriptSource = cachedScript->script(); cachedScript->deref(this); cachedScript = 0; //fprintf(stderr,"HTMLTokenizer::notifyFinished"); executingScript = true; view->part()->executeScript(scriptSource.string()); executingScript = false; // 'script' is true when we are called synchronously from // parseScript(). In that case parseScript() will take care // of 'scriptOutput'. if (!script) { QString rest = scriptOutput+pendingSrc; scriptOutput = pendingSrc = ""; write(rest); }#endif }//added for loadingscriptbool HTMLTokenizer::CanFinishParsing(){#ifdef JAVASCRIPT_ENABLE return ((cachedScript&&cachedScript->isDataOk())||!cachedScript);#else return 1;#endif }void HTMLTokenizer::addScriptOutput(){ if ( !scriptOutput.isEmpty() ) {// kdDebug( 6036 ) << "adding scriptOutput to parsed string" << endl; QString newStr = scriptOutput; newStr += QString(src.current(), src.length()); _src = newStr; src = DOMStringIt(_src); scriptOutput = ""; }}#include "htmltokenizer.moc"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -