📄 htmltokenizer.cpp
字号:
scriptCodeSize = scriptCodeDest-scriptCode; } else { scriptCode[scriptCodeSize] = *src; fixUpChar(scriptCode[scriptCodeSize]); ++scriptCodeSize; ++src; } }}void HTMLTokenizer::scriptHandler(){ // We are inside a <script> bool doScriptExec = false; CachedScript* cs = 0; // don't load external scripts for standalone documents (for now) if (!scriptSrc.isEmpty() && parser->doc()->part()) { // forget what we just got; load from src url instead if ( !parser->skipMode() ) { if ( (cs = parser->doc()->docLoader()->requestScript(scriptSrc, scriptSrcCharset) )) cachedScript.enqueue(cs); } scriptSrc=QString::null; } else {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "---START SCRIPT---" << endl; kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl; kdDebug( 6036 ) << "---END SCRIPT---" << endl;#endif // Parse scriptCode containing <script> info doScriptExec = true; } processListing(TokenizerString(scriptCode, scriptCodeSize)); QString exScript( buffer, dest-buffer ); processToken(); currToken.id = ID_SCRIPT + ID_CLOSE_TAG; processToken(); TokenizerString prependingSrc; if ( !parser->skipMode() ) { if (cs) { //kdDebug( 6036 ) << "cachedscript extern!" << endl; //kdDebug( 6036 ) << "src: *" << QString( src.current(), src.length() ).latin1() << "*" << endl; //kdDebug( 6036 ) << "pending: *" << pendingSrc.latin1() << "*" << endl; pendingSrc.prepend(src); setSrc(TokenizerString()); scriptCodeSize = scriptCodeResync = 0; cs->ref(this); // will be 0 if script was already loaded and ref() executed it if (cachedScript.count()) loadingExtScript = true; } else if (view && doScriptExec && javascript ) { if (!m_executingScript) pendingSrc.prepend(src); else prependingSrc = src; setSrc(TokenizerString()); scriptCodeSize = scriptCodeResync = 0; //QTime dt; //dt.start(); scriptExecution( exScript, QString::null, scriptStartLineno ); //kdDebug( 6036 ) << "script execution time:" << dt.elapsed() << endl; } } script = false; scriptCodeSize = scriptCodeResync = 0; if ( !m_executingScript && !loadingExtScript ) { // kdDebug( 6036 ) << "adding pending Output to parsed string" << endl; src.append(pendingSrc); pendingSrc.clear(); } else if (!prependingSrc.isEmpty()) write(prependingSrc, false);}void HTMLTokenizer::scriptExecution( const QString& str, QString scriptURL, int baseLine){#if APPLE_CHANGES if (!view || !view->part()) return;#endif bool oldscript = script; m_executingScript++; script = false; QString url; if (scriptURL.isNull()) url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL(); else url = scriptURL; view->part()->executeScript(url,baseLine,Node(),str); m_executingScript--; script = oldscript;}void HTMLTokenizer::parseComment(TokenizerString &src){ checkScriptBuffer(src.length()); while ( !src.isEmpty() ) { scriptCode[ scriptCodeSize++ ] = *src;#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("comment is now: *%s*", QConstString((QChar*)src.current(), QMIN(16, src.length())).string().latin1());#endif if (src->unicode() == '>' && ( ( brokenComments && !( script || style ) ) || ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-' ) || // Other browsers will accept --!> as a close comment, even though it's // not technically valid. ( scriptCodeSize > 3 && scriptCode[scriptCodeSize-4] == '-' && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '!' ) ) ) { ++src; if ( !( script || xmp || textarea || style) ) {#ifdef COMMENTS_IN_DOM checkScriptBuffer(); scriptCode[ scriptCodeSize ] = 0; scriptCode[ scriptCodeSize + 1 ] = 0; currToken.id = ID_COMMENT; processListing(TokenizerString(scriptCode, scriptCodeSize - 2)); processToken(); currToken.id = ID_COMMENT + ID_CLOSE_TAG; processToken();#endif scriptCodeSize = 0; } comment = false; return; // Finished parsing comment } ++src; }}void HTMLTokenizer::parseServer(TokenizerString &src){ checkScriptBuffer(src.length()); while ( !src.isEmpty() ) { scriptCode[ scriptCodeSize++ ] = *src; if (src->unicode() == '>' && scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') { ++src; server = false; scriptCodeSize = 0; return; // Finished parsing server include } ++src; }}void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src){ char oldchar = 0; while ( !src.isEmpty() ) { unsigned char chbegin = src->latin1(); if(chbegin == '\'') { tquote = tquote == SingleQuote ? NoQuote : SingleQuote; } else if(chbegin == '\"') { tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; } // Look for '?>' // some crappy sites omit the "?" before it, so // we look for an unquoted '>' instead. (IE compatible) else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) ) { // We got a '?>' sequence processingInstruction = false; ++src; discard=LFDiscard; return; // Finished parsing comment! } ++src; oldchar = chbegin; }}void HTMLTokenizer::parseText(TokenizerString &src){ while ( !src.isEmpty() ) { // do we need to enlarge the buffer? checkBuffer(); // ascii is okay because we only do ascii comparisons unsigned char chbegin = src->latin1(); if (skipLF && ( chbegin != '\n' )) { skipLF = false; } if (skipLF) { skipLF = false; ++src; } else if (( chbegin == '\n' ) || ( chbegin == '\r' )) { if (chbegin == '\r') skipLF = true; *dest++ = '\n'; ++src; } else { *dest = *src; fixUpChar(*dest); ++dest; ++src; } }}void HTMLTokenizer::parseEntity(TokenizerString &src, QChar *&dest, bool start){ if( start ) { cBufferPos = 0; Entity = SearchEntity; EntityUnicodeValue = 0; } while( !src.isEmpty() ) { ushort cc = src->unicode(); switch(Entity) { case NoEntity: assert(Entity != NoEntity); return; case SearchEntity: if(cc == '#') { cBuffer[cBufferPos++] = cc; ++src; Entity = NumericSearch; } else Entity = EntityName; break; case NumericSearch: if(cc == 'x' || cc == 'X') { cBuffer[cBufferPos++] = cc; ++src; Entity = Hexadecimal; } else if(cc >= '0' && cc <= '9') Entity = Decimal; else Entity = SearchSemicolon; break; case Hexadecimal: { int ll = kMin(src.length(), 8); while(ll--) { QChar csrc(src->lower()); cc = csrc.cell(); if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) { break; } EntityUnicodeValue = EntityUnicodeValue*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10)); cBuffer[cBufferPos++] = cc; ++src; } Entity = SearchSemicolon; break; } case Decimal: { int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { cc = src->cell(); if(src->row() || !(cc >= '0' && cc <= '9')) { Entity = SearchSemicolon; break; } EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0'); cBuffer[cBufferPos++] = cc; ++src; } if(cBufferPos == 9) Entity = SearchSemicolon; break; } case EntityName: { int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { QChar csrc = *src; cc = csrc.cell(); if(csrc.row() || !((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { Entity = SearchSemicolon; break; } cBuffer[cBufferPos++] = cc; ++src; } if(cBufferPos == 9) Entity = SearchSemicolon; if(Entity == SearchSemicolon) { if(cBufferPos > 1) { const entity *e = findEntity(cBuffer, cBufferPos); if(e) EntityUnicodeValue = e->code; // be IE compatible if(tag && EntityUnicodeValue > 255 && *src != ';') EntityUnicodeValue = 0; } } else break; } case SearchSemicolon: //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl; // Don't allow surrogate code points, or values that are more than 21 bits. if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800) || (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) { if (*src == ';') ++src; if (EntityUnicodeValue <= 0xFFFF) { QChar c(EntityUnicodeValue); fixUpChar(c); checkBuffer(); src.push(c); } else { // Convert to UTF-16, using surrogate code points. QChar c1(0xD800 | (((EntityUnicodeValue >> 16) - 1) << 6) | ((EntityUnicodeValue >> 10) & 0x3F)); QChar c2(0xDC00 | (EntityUnicodeValue & 0x3FF)); checkBuffer(2); src.push(c1); src.push(c2); } } else {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "unknown entity!" << endl;#endif checkBuffer(10); // ignore the sequence, add it to the buffer as plaintext *dest++ = '&'; for(unsigned int i = 0; i < cBufferPos; i++) dest[i] = cBuffer[i]; dest += cBufferPos; if (pre) prePos += cBufferPos+1; } Entity = NoEntity; return; } }}void HTMLTokenizer::parseTag(TokenizerString &src){ assert(!Entity ); while ( !src.isEmpty() ) { checkBuffer();#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 uint l = 0; while(l < src.length() && (*(src.current()+l)).latin1() != '>') l++; qDebug("src is now: *%s*, tquote: %d", QConstString((QChar*)src.current(), l).string().latin1(), tquote);#endif switch(tag) { case NoTag: { return; } case TagName: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("TagName");#endif if (searchCount > 0) { if (*src == commentStart[searchCount]) { searchCount++; if (searchCount == 4) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Found comment" << endl;#endif // Found '<!--' sequence ++src; dest = buffer; // ignore the previous part of this tag comment = true; tag = NoTag; // Fix bug 34302 at kde.bugs.org. Go ahead and treat // <!--> as a valid comment, since both mozilla and IE on windows // can handle this case. Only do this in quirks mode. -dwh if (!src.isEmpty() && *src == '>' && parser->doc()->inCompatMode()) { comment = false; ++src; if (!src.isEmpty()) cBuffer[cBufferPos++] = src->cell(); } else parseComment(src); return; // Finished parsing tag! } // cuts of high part, is okay cBuffer[cBufferPos++] = src->cell(); ++src; break; } else searchCount = 0; // Stop looking for '<!--' sequence } bool finish = false; unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos); while(ll--) { ushort curchar = *src; if(curchar <= ' ' || curchar == '>' ) { finish = true; break; } // Use tolower() instead of | 0x20 to lowercase the char because there is no // performance gain in using | 0x20 since tolower() is optimized and // | 0x20 turns characters such as '_' into junk. cBuffer[cBufferPos++] = tolower(curchar); ++src; } // Disadvantage: we add the possible rest of the tag // as attribute names. ### judge if this causes problems if(finish || CBUFLEN == cBufferPos) { bool beginTag; char* ptr = cBuffer; unsigned int len = cBufferPos; cBuffer[cBufferPos] = '\0'; if ((cBufferPos > 0) && (*ptr == '/')) { // End Tag beginTag = false; ptr++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -