📄 htmltokenizer.cpp
字号:
parseSpecial(src, false); else if (listing) parseSpecial(src, false); else if (textarea) parseSpecial(src, false); else if (comment) parseComment(src); else if (processingInstruction) parseProcessingInstruction(src); else if (tag) parseTag(src); } while ( src.length() ) { // do we need to enlarge the buffer? checkBuffer(); ushort cc = src->unicode(); if (skipLF && (cc != '\n')) skipLF = false; if (skipLF) { skipLF = false; ++src; } else if ( Entity ) { parseEntity( src, dest ); } else if ( plaintext ) { parseText( src ); } else if ( startTag ) { startTag = false; switch(cc) { case '/': break; case '!': { // <!-- comment --> searchCount = 1; // Look for '<!--' sequence to start comment break; } case '?': { // xml processing instruction processingInstruction = true; tquote = NoQuote; parseProcessingInstruction(src); continue; break; } default: { if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) { // Start of a Start-Tag } else { // Invalid tag // Add as is if (pending) addPending(); *dest = '<'; dest++; continue; } } }; // end case if ( pending ) { // pre context always gets its spaces/linefeeds if ( pre ) addPending(); // only add in existing inline context or if // we just started one, i.e. we're about to insert real text else if ( !parser->selectMode() && ( !parser->noSpaces() || dest > buffer )) { addPending(); discard = AllDiscard; } // just forget it else pending = NonePending; } processToken(); cBufferPos = 0; tag = TagName; parseTag(src); } else if ( cc == '&' && !src.escaped()) { ++src; if ( pending ) addPending(); parseEntity(src, dest, true); } else if ( cc == '<' && !src.escaped()) { ++src; startTag = true; discard = NoneDiscard; } else if (( cc == '\n' ) || ( cc == '\r' )) { if ( pre || textarea) { if (discard == LFDiscard || discard == AllDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else { // Process this LF if (pending) addPending(); pending = LFPending; } } else { if (discard == LFDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else if(discard == AllDiscard) { } else { // Process this LF if (pending == NonePending) pending = LFPending; } } /* Check for MS-DOS CRLF sequence */ if (cc == '\r') { skipLF = true; } ++src; } else if (( cc == ' ' ) || ( cc == '\t' )) { if ( pre || textarea) { if (pending) addPending(); if (cc == ' ') pending = SpacePending; else pending = TabPending; } else { if(discard == SpaceDiscard) discard = NoneDiscard; else if(discard == AllDiscard) { } else pending = SpacePending; } ++src; } else { if (pending) addPending(); discard = NoneDiscard; if ( pre ) { prePos++; } unsigned char row = src->row(); if ( row > 0x05 && row < 0x10 || row > 0xfd ) currToken.complexText = true; *dest = *src; fixUpChar( *dest ); ++dest; ++src; } } _src = QString(); if (noMoreData && !loadingExtScript && !m_executingScript ) end(); // this actually causes us to be deleted}void HTMLTokenizer::end(){ if ( buffer == 0 ) { emit finishedParsing(); return; } // parseTag is using the buffer for different matters if ( !tag ) processToken(); if(buffer) KHTML_DELETE_QCHAR_VEC(buffer); if(scriptCode) KHTML_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0; scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; buffer = 0; emit finishedParsing();}void HTMLTokenizer::finish(){ // do this as long as we don't find matching comment ends while(comment && scriptCode && scriptCodeSize) { // we've found an unmatched comment start brokenComments = true; checkScriptBuffer(); scriptCode[ scriptCodeSize ] = 0; scriptCode[ scriptCodeSize + 1 ] = 0; int pos; QString food; if ( script || style || textarea || listing ) { pos = QConstString( scriptCode, scriptCodeSize ).string().find( searchStopper, 0, false ); if ( pos >= 0 ) food.setUnicode( scriptCode+pos, scriptCodeSize-pos ); // deep copy } else { pos = QConstString(scriptCode, scriptCodeSize).string().find('>'); food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy } KHTML_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0; scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; comment = false; if ( !food.isEmpty() ) write(food, true); } // this indicates we will not recieve any more data... but if we are waiting on // an external script to load, we can't finish parsing until that is done noMoreData = true; if (!loadingExtScript && !m_executingScript && !onHold) end(); // this actually causes us to be deleted}void HTMLTokenizer::processToken(){ if ( dest > buffer ) {#ifdef TOKEN_DEBUG if(currToken.id && currToken.id != ID_COMMENT) { qDebug( "unexpected token id: %d, str: *%s*", currToken.id,QConstString( buffer,dest-buffer ).string().latin1() ); assert(0); }#endif if ( currToken.complexText ) { // ### we do too much QString copying here, but better here than in RenderText... // anyway have to find a better solution in the long run (lars) QString s = QConstString(buffer, dest-buffer).string(); s.compose(); currToken.text = new DOMStringImpl( s.unicode(), s.length() ); currToken.text->ref(); } else { currToken.text = new DOMStringImpl( buffer, dest - buffer ); currToken.text->ref(); } if (currToken.id != ID_COMMENT) currToken.id = ID_TEXT; } else if(!currToken.id) { currToken.reset(); return; } dest = buffer;#ifdef TOKEN_DEBUG QString name = getTagName(currToken.id).string(); QString text; if(currToken.text) text = QConstString(currToken.text->s, currToken.text->l).string(); kdDebug( 6036 ) << "Token --> " << name << " id = " << currToken.id << endl; if(!text.isNull()) kdDebug( 6036 ) << "text: \"" << text << "\"" << endl; int l = currToken.attrs ? currToken.attrs->length() : 0; if(l>0) { int i = 0; kdDebug( 6036 ) << "Attributes: " << l << endl; while(i<l) { AttrImpl* c = static_cast<AttrImpl*>(currToken.attrs->item(i)); kdDebug( 6036 ) << " " << c->attrId << " " << c->name().string() << "=\"" << c->value().string() << "\"" << endl; i++; } } kdDebug( 6036 ) << endl;#endif // pass the token over to the parser, the parser DOES NOT delete the token parser->parseToken(&currToken); currToken.reset();}HTMLTokenizer::~HTMLTokenizer(){ reset(); delete parser;}void HTMLTokenizer::enlargeBuffer(int len){ int newsize = kMax(size*2, size+len); int oldoffs = (dest - buffer); buffer = (QChar*)realloc(buffer, newsize*sizeof(QChar)); dest = buffer + oldoffs; size = newsize;}void HTMLTokenizer::enlargeScriptBuffer(int len){ int newsize = kMax(scriptCodeMaxSize*2, scriptCodeMaxSize+len); scriptCode = (QChar*)realloc(scriptCode, newsize*sizeof(QChar)); scriptCodeMaxSize = newsize;}void HTMLTokenizer::notifyFinished(CachedObject *finishedObj){ if (view && finishedObj == cachedScript) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Finished loading an external script" << endl;#endif loadingExtScript = false; DOMString scriptSource = cachedScript->script();#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl;#endif cachedScript->deref(this); cachedScript = 0;// pendingSrc.prepend( QString( src.current(), src.length() ) ); // deep copy - again _src = QString::null; src = DOMStringIt( _src ); scriptExecution( scriptSource.string() ); // 'script' is true when we are called synchronously from // parseScript(). In that case parseScript() will take care // of 'scriptOutput'. if ( !script ) { QString rest = pendingSrc; pendingSrc = ""; write(rest, false); } }}void HTMLTokenizer::addPendingSource(){// kdDebug( 6036 ) << "adding pending Output to parsed string" << endl; QString newStr = QString(src.current(), src.length()); newStr += pendingSrc; _src = newStr; src = DOMStringIt(_src); pendingSrc = "";}void HTMLTokenizer::setOnHold(bool _onHold){ if (onHold == _onHold) return; onHold = _onHold; if (!onHold) write( _src, true );}#include "htmltokenizer.moc"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -