📄 htmltokenizer.cpp
字号:
m_executingScript--; script = oldscript;}void HTMLTokenizer::parseComment(TokenizerString &src){ // SGML strict bool strict = parser->doc()->inStrictMode() && parser->doc()->htmlMode() != DocumentImpl::XHtml && !script && !style; int delimiterCount = 0; bool canClose = false; checkScriptBuffer(src.length()); while ( src.length() ) { scriptCode[ scriptCodeSize++ ] = *src;#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("comment is now: *%s*", src.toString().left(16).latin1());#endif if (strict) { if (src->unicode() == '-') { delimiterCount++; if (delimiterCount == 2) { delimiterCount = 0; canClose = !canClose; } } else delimiterCount = 0; } if ((!strict || canClose) && src->unicode() == '>') { bool handleBrokenComments = brokenComments && !( script || style ); bool scriptEnd=false; if (!strict) { if ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && scriptCode[scriptCodeSize-2] == '-' ) scriptEnd=true; } if (canClose || handleBrokenComments || scriptEnd ){ ++src; if ( !( script || xmp || textarea || style) ) {#ifdef COMMENTS_IN_DOM checkScriptBuffer(); scriptCode[ scriptCodeSize ] = 0; scriptCode[ scriptCodeSize + 1 ] = 0; currToken.tid = ID_COMMENT; processListing(DOMStringIt(scriptCode, scriptCodeSize - 2)); processToken(); currToken.tid = ID_COMMENT + ID_CLOSE_TAG; processToken();#endif scriptCodeSize = 0; } comment = false; return; // Finished parsing comment } } ++src; }}void HTMLTokenizer::parseServer(TokenizerString &src){ checkScriptBuffer(src.length()); while ( !src.isEmpty() ) { scriptCode[ scriptCodeSize++ ] = *src; if (src->unicode() == '>' && scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') { ++src; server = false; scriptCodeSize = 0; return; // Finished parsing server include } ++src; }}void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src){ char oldchar = 0; while ( !src.isEmpty() ) { unsigned char chbegin = src->latin1(); if(chbegin == '\'') { tquote = tquote == SingleQuote ? NoQuote : SingleQuote; } else if(chbegin == '\"') { tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; } // Look for '?>' // some crappy sites omit the "?" before it, so // we look for an unquoted '>' instead. (IE compatible) else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) ) { // We got a '?>' sequence processingInstruction = false; ++src; discard=LFDiscard; return; // Finished parsing comment! } ++src; oldchar = chbegin; }}void HTMLTokenizer::parseText(TokenizerString &src){ while ( !src.isEmpty() ) { // do we need to enlarge the buffer? checkBuffer(); // ascii is okay because we only do ascii comparisons unsigned char chbegin = src->latin1(); if (skipLF && ( chbegin != '\n' )) { skipLF = false; } if (skipLF) { skipLF = false; ++src; } else if (( chbegin == '\n' ) || ( chbegin == '\r' )) { if (chbegin == '\r') skipLF = true; *dest++ = '\n'; ++src; } else { *dest++ = *src; ++src; } }}void HTMLTokenizer::parseEntity(TokenizerString &src, QChar *&dest, bool start){ if( start ) { cBufferPos = 0; Entity = SearchEntity; } while( !src.isEmpty() ) { ushort cc = src->unicode(); switch(Entity) { case NoEntity: return; break; case SearchEntity: if(cc == '#') { cBuffer[cBufferPos++] = cc; ++src; Entity = NumericSearch; } else Entity = EntityName; break; case NumericSearch: if(cc == 'x' || cc == 'X') { cBuffer[cBufferPos++] = cc; ++src; Entity = Hexadecimal; } else if(cc >= '0' && cc <= '9') Entity = Decimal; else Entity = SearchSemicolon; break; case Hexadecimal: { int uc = EntityChar.unicode(); int ll = kMin<uint>(src.length(), 8); while(ll--) { QChar csrc(src->lower()); cc = csrc.cell(); if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) { break; } uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10)); cBuffer[cBufferPos++] = cc; ++src; } EntityChar = QChar(uc); Entity = SearchSemicolon; break; } case Decimal: { int uc = EntityChar.unicode(); int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { cc = src->cell(); if(src->row() || !(cc >= '0' && cc <= '9')) { Entity = SearchSemicolon; break; } uc = uc * 10 + (cc - '0'); cBuffer[cBufferPos++] = cc; ++src; } EntityChar = QChar(uc); if(cBufferPos == 9) Entity = SearchSemicolon; break; } case EntityName: { int ll = kMin(src.length(), 9-cBufferPos); while(ll--) { QChar csrc = *src; cc = csrc.cell(); if(csrc.row() || !((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { Entity = SearchSemicolon; break; } cBuffer[cBufferPos++] = cc; ++src; // be IE compatible and interpret even unterminated entities // outside tags. like "foo  stuff bla". if ( tag == NoTag ) { const entity* e = kde_findEntity(cBuffer, cBufferPos); if ( e && e->code < 256 ) { Entity = SearchSemicolon; break; } } } if(cBufferPos == 9) Entity = SearchSemicolon; if(Entity == SearchSemicolon) { if(cBufferPos > 1) { const entity *e = kde_findEntity(cBuffer, cBufferPos); if(e && ( e->code < 256 || *src == ';' )) EntityChar = e->code; } } break; } case SearchSemicolon:#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << endl;#endif fixUpChar(EntityChar); if (*src == ';') ++src; if ( !EntityChar.isNull() ) { checkBuffer(); // Just insert it src.push( EntityChar ); } else {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "unknown entity!" << endl;#endif checkBuffer(10); // ignore the sequence, add it to the buffer as plaintext *dest++ = '&'; for(unsigned int i = 0; i < cBufferPos; i++) dest[i] = cBuffer[i]; dest += cBufferPos; Entity = NoEntity; if (pre) prePos += cBufferPos+1; } Entity = NoEntity; EntityChar = QChar::null; return; }; }}void HTMLTokenizer::parseTag(TokenizerString &src){ assert(!Entity ); checkScriptBuffer( src.length() ); while ( !src.isEmpty() ) { checkBuffer();#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 uint l = 0; while(l < src.length() && (src.toString()[l]).latin1() != '>') l++; qDebug("src is now: *%s*, tquote: %d", src.toString().left(l).latin1(), tquote);#endif switch(tag) { case NoTag: return; case TagName: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("TagName");#endif if (searchCount > 0) { if (*src == commentStart[searchCount]) { searchCount++; if (searchCount == 4) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Found comment" << endl;#endif // Found '<!--' sequence ++src; dest = buffer; // ignore the previous part of this tag tag = NoTag; comment = true; parseComment(src); return; // Finished parsing tag! } // cuts of high part, is okay cBuffer[cBufferPos++] = src->cell(); ++src; break; } else searchCount = 0; // Stop looking for '<!--' sequence } bool finish = false; unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos); while(ll--) { ushort curchar = *src; if(curchar <= ' ' || curchar == '>' ) { finish = true; break; } // this is a nasty performance trick. will work for the A-Z // characters, but not for others. if it contains one, // we fail anyway char cc = curchar; cBuffer[cBufferPos++] = cc | 0x20; ++src; } // Disadvantage: we add the possible rest of the tag // as attribute names. ### judge if this causes problems if(finish || CBUFLEN == cBufferPos) { bool beginTag; char* ptr = cBuffer; unsigned int len = cBufferPos; cBuffer[cBufferPos] = '\0'; if ((cBufferPos > 0) && (*ptr == '/')) { // End Tag beginTag = false; ptr++; len--; } else // Start Tag beginTag = true; // Accept empty xml tags like <br/> if(len > 1 && ptr[len-1] == '/' ) { ptr[--len] = '\0'; // if its like <br/> and not like <input/ value=foo>, take it as flat if (*src == '>') currToken.flat = true; } uint tagID = khtml::getTagID(ptr, len); if (!tagID) {#ifdef TOKEN_DEBUG QCString tmp(ptr, len+1); kdDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"" << endl;#endif dest = buffer; } else {#ifdef TOKEN_DEBUG QCString tmp(ptr, len+1); kdDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data() << endl;#endif currToken.tid = beginTag ? tagID : tagID + ID_CLOSE_TAG; dest = buffer; } tag = SearchAttribute; cBufferPos = 0; } break; } case SearchAttribute: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("SearchAttribute");#endif bool atespace = false; ushort curchar; while(!src.isEmpty()) { curchar = *src; if(curchar > ' ') { if(curchar == '<' || curchar == '>') tag = SearchEnd; else if(atespace && (curchar == '\'' || curchar == '"')) { tag = SearchValue; *dest++ = 0; attrName = QString::null; } else tag = AttributeName; cBufferPos = 0; break; } atespace = true; ++src; } break; } case AttributeName: {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -