📄 htmltokenizer.cpp
字号:
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("AttributeName");#endif ushort curchar; int ll = kMin(src.length(), CBUFLEN-cBufferPos); while(ll--) { curchar = *src; if(curchar <= '>') { if(curchar <= ' ' || curchar == '=' || curchar == '>') { unsigned int a; cBuffer[cBufferPos] = '\0'; a = khtml::getAttrID(cBuffer, cBufferPos); if ( !a ) attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data()); dest = buffer; *dest++ = a;#ifdef TOKEN_DEBUG if (!a || (cBufferPos && *cBuffer == '!')) kdDebug( 6036 ) << "Unknown attribute: *" << QCString(cBuffer, cBufferPos+1).data() << "*" << endl; else kdDebug( 6036 ) << "Known attribute: " << QCString(cBuffer, cBufferPos+1).data() << endl;#endif // did we just get /> if (!a && cBufferPos == 1 && *cBuffer == '/' && curchar == '>') currToken.flat = true; tag = SearchEqual; break; } } cBuffer[cBufferPos++] = ( curchar >= 'A' && curchar <= 'Z' ) ? curchar | 0x20 : curchar; ++src; } if ( cBufferPos == CBUFLEN ) { cBuffer[cBufferPos] = '\0'; attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data()); dest = buffer; *dest++ = 0; tag = SearchEqual; } break; } case SearchEqual: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("SearchEqual");#endif ushort curchar; bool atespace = false; while(!src.isEmpty()) { curchar = src->unicode(); if(curchar > ' ') { if(curchar == '=') {#ifdef TOKEN_DEBUG kdDebug(6036) << "found equal" << endl;#endif tag = SearchValue; ++src; } else if(atespace && (curchar == '\'' || curchar == '"')) { tag = SearchValue; *dest++ = 0; attrName = QString::null; } else { DOMString v(""); currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v); dest = buffer; tag = SearchAttribute; } break; } atespace = true; ++src; } break; } case SearchValue: { ushort curchar; while(!src.isEmpty()) { curchar = src->unicode(); if(curchar > ' ') { if(( curchar == '\'' || curchar == '\"' )) { tquote = curchar == '\"' ? DoubleQuote : SingleQuote; tag = QuotedValue; ++src; } else tag = Value; break; } ++src; } break; } case QuotedValue: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("QuotedValue");#endif ushort curchar; while(!src.isEmpty()) { checkBuffer(); curchar = src->unicode(); if(curchar <= '\'' && !src.escaped()) { // ### attributes like '&{blaa....};' are supposed to be treated as jscript. if ( curchar == '&' ) { ++src; parseEntity(src, dest, true); break; } else if ( (tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"') ) { // some <input type=hidden> rely on trailing spaces. argh while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r')) dest--; // remove trailing newlines DOMString v(buffer+1, dest-buffer-1); currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v); dest = buffer; tag = SearchAttribute; tquote = NoQuote; ++src; break; } } *dest++ = *src; ++src; } break; } case Value: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("Value");#endif ushort curchar; while(!src.isEmpty()) { checkBuffer(); curchar = src->unicode(); if(curchar <= '>' && !src.escaped()) { // parse Entities if ( curchar == '&' ) { ++src; parseEntity(src, dest, true); break; } // no quotes. Every space means end of value // '/' does not delimit in IE! if ( curchar <= ' ' || curchar == '>' ) { DOMString v(buffer+1, dest-buffer-1); currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v); dest = buffer; tag = SearchAttribute; break; } } *dest++ = *src; ++src; } break; } case SearchEnd: {#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 qDebug("SearchEnd");#endif while(!src.isEmpty()) { if(*src == '<' || *src == '>') break; if (*src == '/') currToken.flat = true; ++src; } if(src.isEmpty() && *src != '<' && *src != '>') break; searchCount = 0; // Stop looking for '<!--' sequence tag = NoTag; tquote = NoQuote; if ( *src == '>' ) ++src; if ( !currToken.tid ) //stop if tag is unknown return; uint tagID = currToken.tid;#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0 kdDebug( 6036 ) << "appending Tag: " << tagID << endl;#endif // If the tag requires an end tag it cannot be flat, // unless we are using the HTML parser to parse XHTML // The only exception is SCRIPT and priority 0 tokens. if (tagID < ID_CLOSE_TAG && tagID != ID_SCRIPT && DOM::endTag[tagID] == DOM::REQUIRED && parser->doc()->htmlMode() != DocumentImpl::XHtml) currToken.flat = false; bool beginTag = !currToken.flat && (tagID < ID_CLOSE_TAG); if(tagID >= ID_CLOSE_TAG) tagID -= ID_CLOSE_TAG; else if ( !brokenScript && tagID == ID_SCRIPT ) { DOMStringImpl* a = 0; bool foundTypeAttribute = false; scriptSrc = scriptSrcCharset = QString::null; if ( currToken.attrs && /* potentially have a ATTR_SRC ? */ view && /* are we a regular tokenizer or just for innerHTML ? */ parser->doc()->view()->part()->jScriptEnabled() /* jscript allowed at all? */ ) { if ( ( a = currToken.attrs->getValue( ATTR_SRC ) ) ) scriptSrc = parser->doc()->completeURL(khtml::parseURL( DOMString(a) ).string() ); if ( ( a = currToken.attrs->getValue( ATTR_CHARSET ) ) ) scriptSrcCharset = DOMString(a).string().stripWhiteSpace(); if ( scriptSrcCharset.isEmpty() && view) scriptSrcCharset = parser->doc()->view()->part()->encoding(); /* Check type before language, since language is deprecated */ if ((a = currToken.attrs->getValue(ATTR_TYPE)) != 0 && !DOMString(a).string().isEmpty()) foundTypeAttribute = true; else a = currToken.attrs->getValue(ATTR_LANGUAGE); } javascript = true; if( foundTypeAttribute ) { /* Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does. Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does. Mozilla 1.5 accepts application/x-javascript, WinIE 6 doesn't. Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't. Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string. We want to accept all the values that either of these browsers accept, but not other values. */ QString type = DOMString(a).string().stripWhiteSpace().lower(); if( type.compare("text/javascript") != 0 && type.compare("text/javascript1.0") != 0 && type.compare("text/javascript1.1") != 0 && type.compare("text/javascript1.2") != 0 && type.compare("text/javascript1.3") != 0 && type.compare("text/javascript1.4") != 0 && type.compare("text/javascript1.5") != 0 && type.compare("text/jscript") != 0 && type.compare("text/ecmascript") != 0 && type.compare("text/livescript") != 0 && type.compare("application/x-javascript") != 0 && type.compare("application/ecmascript") != 0 ) javascript = false; } else if( a ) { /* Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does. Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3. Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace. We want to accept all the values that either of these browsers accept, but not other values. */ QString lang = DOMString(a).string(); lang = lang.lower(); if( lang.compare("") != 0 && lang.compare("javascript") != 0 && lang.compare("javascript1.0") != 0 && lang.compare("javascript1.1") != 0 && lang.compare("javascript1.2") != 0 && lang.compare("javascript1.3") != 0 && lang.compare("javascript1.4") != 0 && lang.compare("javascript1.5") != 0 && lang.compare("ecmascript") != 0 && lang.compare("livescript") != 0 && lang.compare("jscript") ) javascript = false; } } processToken(); if ( parser->selectMode() && beginTag) discard = AllDiscard; switch( tagID ) { case ID_PRE: pre = beginTag; if (beginTag) discard = LFDiscard; prePos = 0; break; case ID_BR: prePos = 0; break; case ID_SCRIPT: if (beginTag) { searchStopper = scriptEnd; searchStopperLen = 8; script = true; parseSpecial(src); } else if (tagID < ID_CLOSE_TAG) // Handle <script src="foo"/> scriptHandler(); break; case ID_STYLE: if (beginTag) { searchStopper = styleEnd; searchStopperLen = 7; style = true; parseSpecial(src); } break; case ID_TEXTAREA: if(beginTag) { searchStopper = textareaEnd; searchStopperLen = 10; textarea = true; discard = NoneDiscard; parseSpecial(src); } break; case ID_TITLE: if (beginTag) { searchStopper = titleEnd; searchStopperLen = 7; title = true; parseSpecial(src); } break; case ID_XMP: if (beginTag) { searchStopper = xmpEnd; searchStopperLen = 5; xmp = true; parseSpecial(src); } break; case ID_SELECT: select = beginTag; break; case ID_PLAINTEXT: plaintext = beginTag; break; } return; // Finished parsing tag! } } // end switch } return;}void HTMLTokenizer::addPending(){ if ( select && !(comment || script)) { *dest++ = ' '; } else if ( textarea ) { switch(pending) { case LFPending: *dest++ = '\n'; prePos = 0; break; case SpacePending: *dest++ = ' '; ++prePos; break; case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break; case NonePending: assert(0); } } else { int p; switch (pending) { case SpacePending: // Insert a breaking space *dest++ = QChar(' '); prePos++; break; case LFPending: *dest = '\n'; dest++; prePos = 0; break; case TabPending: p = TAB_SIZE - ( prePos % TAB_SIZE ); for ( int x = 0; x < p; x++ ) *dest++ = QChar(' '); prePos += p; break; case NonePending: assert(0); break; } } pending = NonePending;}void HTMLTokenizer::write( const TokenizerString &str, bool appendData ){#ifdef TOKEN_DEBUG kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl;#endif if ( !buffer ) return; if ( ( m_executingScript && appendData ) || ( !m_executingScript && cachedScript.count() ) ) { // don't parse; we will do this later pendingSrc.append(str); return; } if ( onHold ) { src.append(str); return; } setSrc(str); m_abort = false;// if (Entity)// parseEntity(src, dest); while ( !src.isEmpty() ) { if ( m_abort ) return; // do we need to enlarge the buffer? checkBuffer(); ushort cc = src->unicode();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -