📄 htmltokenizer.cpp
字号:
}void HTMLTokenizer::scriptExecution( const QString& str ){ bool oldscript = script; m_executingScript++; script = false; view->part()->executeScript(str); m_executingScript--; script = oldscript;}void HTMLTokenizer::parseComment(DOMStringIt &src){ checkScriptBuffer(src.length()); while ( src.length() ) { scriptCode[ scriptCodeSize++ ] = *src; if (src->unicode() == '>' && ( ( brokenComments && !( script || style || textarea || listing ) ) || ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-' ) ) ) { ++src; if ( !( script || listing || textarea || style) ) {#ifdef COMMENTS_IN_DOM checkScriptBuffer(); scriptCode[ scriptCodeSize ] = 0; scriptCode[ scriptCodeSize + 1 ] = 0; currToken.id = ID_COMMENT; processListing(DOMStringIt(scriptCode, scriptCodeSize - 2)); processToken(); currToken.id = ID_COMMENT + ID_CLOSE_TAG; processToken();#endif scriptCodeSize = 0; } comment = false; return; // Finished parsing comment } ++src; }}void HTMLTokenizer::parseProcessingInstruction(DOMStringIt &src){ char oldchar = 0; while ( src.length() ) { unsigned char chbegin = src->latin1(); if(chbegin == '\'') { tquote = tquote == SingleQuote ? NoQuote : SingleQuote; } else if(chbegin == '\"') { tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; } // Look for '?>' // some crappy sites omit the "?" before it, so // we look for an unquoted '>' instead. (IE compatible) else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) ) { // We got a '?>' sequence processingInstruction = false; ++src; discard=LFDiscard; return; // Finished parsing comment! } ++src; oldchar = chbegin; }}void HTMLTokenizer::parseText(DOMStringIt &src){ while ( src.length() ) { // do we need to enlarge the buffer? checkBuffer(); // ascii is okay because we only do ascii comparisons unsigned char chbegin = src->latin1(); if (skipLF && ( chbegin != '\n' )) { skipLF = false; } if (skipLF) { skipLF = false; ++src; } else if (( chbegin == '\n' ) || ( chbegin == '\r' )) { if (chbegin == '\r') skipLF = true; *dest++ = '\n'; ++src; } else { *dest++ = *src; ++src; } }}void HTMLTokenizer::parseEntity(DOMStringIt &src, QChar *&dest, bool start){ if( start ) { cBufferPos = 0; Entity = SearchEntity; } while( src.length() ) { ushort cc = src->unicode(); switch(Entity) { case NoEntity: return; break; case SearchEntity: if(cc == '#') { cBuffer[cBufferPos++] = cc; ++src; Entity = NumericSearch; } else Entity = EntityName; break; case NumericSearch: if(cc == 'x' || cc == 'X') { cBuffer[cBufferPos++] = cc; ++src; Entity = Hexadecimal; } else if(cc >= '0' && cc <= '9') Entity = Decimal; else Entity = SearchSemicolon; break; case Hexadecimal: { int uc = EntityChar.unicode(); int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { QChar csrc(src->lower()); cc = csrc.cell(); if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) { Entity = SearchSemicolon; break; } uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10)); cBuffer[cBufferPos++] = cc; ++src; } EntityChar = QChar(uc); if(cBufferPos == 9) Entity = SearchSemicolon; break; } case Decimal: { int uc = EntityChar.unicode(); int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { cc = src->cell(); if(src->row() || !(cc >= '0' && cc <= '9')) { Entity = SearchSemicolon; break; } uc = uc * 10 + (cc - '0'); cBuffer[cBufferPos++] = cc; ++src; } EntityChar = QChar(uc); if(cBufferPos == 9) Entity = SearchSemicolon; break; } case EntityName: { int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { QChar csrc = *src; cc = csrc.cell(); if(csrc.row() || !((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { Entity = SearchSemicolon; break; } cBuffer[cBufferPos++] = cc; ++src; } if(cBufferPos == 9) Entity = SearchSemicolon; if(Entity == SearchSemicolon) { if(cBufferPos > 1) { const entity *e = findEntity(cBuffer, cBufferPos); if(e) EntityChar = e->code; // be IE compatible if(tag && EntityChar.unicode() > 255 && *src != ';') EntityChar = QChar::null; } } else break; } case SearchSemicolon: //kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << ", " << res << endl; fixUpChar(EntityChar); if ( EntityChar != QChar::null ) { checkBuffer(); // Just insert it if (*src == ';') ++src; src.push( EntityChar ); } else {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "unknown entity!" << endl;#endif checkBuffer(10); // ignore the sequence, add it to the buffer as plaintext *dest++ = '&'; for(unsigned int i = 0; i < cBufferPos; i++) dest[i] = cBuffer[i]; dest += cBufferPos; Entity = NoEntity; if (pre) prePos += cBufferPos+1; } Entity = NoEntity; EntityChar = QChar::null; return; }; }}void HTMLTokenizer::parseTag(DOMStringIt &src){ assert(!Entity ); while ( src.length() ) { checkBuffer();#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 int l = 0; while(l < src.length() && (*(src.current()+l)).latin1() != '>') l++; qDebug("src is now: *%s*, tquote: %d", QConstString((QChar*)src.current(), l).string().latin1(), tquote);#endif switch(tag) { case NoTag: { return; } case TagName: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("TagName");#endif if (searchCount > 0) { if (*src == commentStart[searchCount]) { searchCount++; if (searchCount == 4) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Found comment" << endl;#endif // Found '<!--' sequence ++src; dest = buffer; // ignore the previous part of this tag comment = true; tag = NoTag; parseComment(src); return; // Finished parsing tag! } // cuts of high part, is okay cBuffer[cBufferPos++] = src->cell(); ++src; break; } else searchCount = 0; // Stop looking for '<!--' sequence } bool finish = false; unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos); while(ll--) { ushort curchar = *src; if(curchar <= ' ' || curchar == '>' ) { finish = true; break; } // this is a nasty performance trick. will work for the A-Z // characters, but not for others. if it contains one, // we fail anyway char cc = curchar; cBuffer[cBufferPos++] = cc | 0x20; ++src; } // Disadvantage: we add the possible rest of the tag // as attribute names. ### judge if this causes problems if(finish || CBUFLEN == cBufferPos) { bool beginTag; char* ptr = cBuffer; unsigned int len = cBufferPos; cBuffer[cBufferPos] = '\0'; if ((cBufferPos > 0) && (*ptr == '/')) { // End Tag beginTag = false; ptr++; len--; } else // Start Tag beginTag = true; // limited xhtml support. Accept empty xml tags like <br/> if(len > 1 && ptr[len-1] == '/' ) ptr[--len] = '\0'; uint tagID = khtml::getTagID(ptr, len); if (!tagID) {#ifdef TOKEN_DEBUG QCString tmp(ptr, len+1); kdDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"" << endl;#endif dest = buffer; } else {#ifdef TOKEN_DEBUG QCString tmp(ptr, len+1); kdDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data() << endl;#endif currToken.id = beginTag ? tagID : tagID + ID_CLOSE_TAG; dest = buffer; } tag = SearchAttribute; cBufferPos = 0; } break; } case SearchAttribute: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("SearchAttribute");#endif bool atespace = false; ushort curchar; while(src.length()) { curchar = *src; if(curchar > ' ') { if(curchar == '>' || curchar == '/') tag = SearchEnd; else if(atespace && (curchar == '\'' || curchar == '"')) { tag = SearchValue; *dest++ = 0; attrName = QString::null; } else tag = AttributeName; cBufferPos = 0; break; } atespace = true; ++src; } break; } case AttributeName: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("AttributeName");#endif ushort curchar;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -