📄 htmltokenizer.cpp
字号:
kdDebug( 6036 ) << "Known attribute: \"" << attrName << "\"" << endl;#endif } tag = SearchEqual; discard = SpaceDiscard; // discard spaces before '=' } break; } case SearchEqual: { if( tquote ) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "bad HTML in parseTag: SearchEqual" << endl;#endif // this is moslty due to a missing '"' somewhere before.. // so let's start searching for a new tag tquote = NoQuote; Attribute a; a.id = *buffer; if(a.id==0) a.setName( attrName ); a.setValue(0, 0); currToken->attrs.add(a); dest = buffer; tag = SearchAttribute; discard = SpaceDiscard; pending = NonePending; } else if( curchar == '=' ) {#ifdef TOKEN_DEBUG kdDebug(6036) << "found equal" << endl;#endif tag = SearchValue; pending = NonePending; // ignore spaces before '=' discard = SpaceDiscard; // discard spaces after '=' ++src; } else if( curchar == '>' ) tag = SearchEnd; else // other chars indicate a new attribte { Attribute a; a.id = *buffer; if(a.id==0) a.setName( attrName ); a.setValue(0, 0); currToken->attrs.add(a); dest = buffer; tag = SearchAttribute; discard = SpaceDiscard; pending = NonePending; } break; } case SearchValue: { if(tquote) { tag = QuotedValue; } else { tag = Value; } pending = NonePending; break; } case QuotedValue: { // ### attributes like '&{blaa....};' are supposed to be treated as jscript. if ( curchar == '&' ) { ++src; discard = NoneDiscard; if (pending) addPending(); charEntity = true; parseEntity(src, dest, true); break; } else if ( !tquote ) { // end of attribute Attribute a; a.id = *buffer; if(a.id==0) a.setName( attrName ); while(*(dest-1) == ' ' && dest>buffer+1) dest--; // remove trailing spaces a.setValue(buffer+1, dest-buffer-1);#ifdef TOKEN_DEBUG kdDebug() << "adding value: *" << QConstString(buffer+1, dest-buffer-1).string() << "*" << endl;#endif currToken->attrs.add(a); dest = buffer; tag = SearchAttribute; discard = SpaceDiscard; pending = NonePending; break; } if( pending ) addPending(); discard = NoneDiscard; *dest++ = src[0]; ++src; break; } case Value: { if( tquote ) { // additional quote. discard it, and define as end of // attribute#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "bad HTML in parseTag: Value" << endl;#endif tquote = NoQuote; } // if discard==NoneDiscard at this point, it means // that we passed an empty "" pair. bit hacky, but... // helps with <tag attr=""otherattr="something"> if ( pending || src[0].latin1() == '>' || discard==NoneDiscard) { // no quotes. Every space means end of value Attribute a; a.id = *buffer; if(a.id==0) a.setName( attrName ); a.setValue(buffer+1, dest-buffer-1);#ifdef TOKEN_DEBUG kdDebug() << "adding value: *" << QConstString(buffer+1, dest-buffer-1).string() << "*" << endl;#endif currToken->attrs.add(a); dest = buffer; tag = SearchAttribute; discard = SpaceDiscard; pending = NonePending; break; } *dest++ = src[0]; ++src; break; } case SearchEnd: { if ( curchar != '>') { ++src; // discard everything, until we found the end break; } searchCount = 0; // Stop looking for '<!--' sequence tag = NoTag; tquote = NoQuote; pending = NonePending; // Ignore pending spaces ++src; if ( currToken->id == 0 ) //stop if tag is unknown { discard = NoneDiscard; *dest = QChar::null; return; } if(dest>buffer) { // add the last attribute Attribute a; a.id = *buffer; if(a.id==0) a.setName( attrName ); a.setValue(buffer+1, dest-buffer-1); currToken->attrs.add(a); dest = buffer; } uint tagID = currToken->id;#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "appending Tag: " << tagID << endl;#endif bool beginTag = (tagID < ID_CLOSE_TAG); if( beginTag && tagID != ID_IMG && tagID != ID_INPUT ) { // Ignore Space/LF's after a start tag discard = LFDiscard; } else if (!beginTag) { // Don't ignore CR/LF's after a close tag discard = NoneDiscard; tagID -= ID_CLOSE_TAG; } if ( tagID == ID_SCRIPT && beginTag ) { int attrIndex = currToken->attrs.find(ATTR_SRC); scriptSrc = (attrIndex == -1 ? (QString)"" : currToken->attrs[attrIndex]->value().string()); attrIndex = currToken->attrs.find(ATTR_LANGUAGE); javascript = true; if( attrIndex != -1 ) { QString lang = currToken->attrs[attrIndex]->value().string(); lang = lang.lower(); if( !lang.contains("javascript") && !lang.contains("ecmascript") && !lang.contains("jscript") ) javascript = false; } else { attrIndex = currToken->attrs.find(ATTR_TYPE); if( attrIndex != -1 ) { QString lang = currToken->attrs[attrIndex]->value().string(); lang = lang.lower(); if( !lang.contains("javascript") && !lang.contains("ecmascript") && !lang.contains("jscript") ) javascript = false; } } } processToken(); if(pre) { // we have to take care to close the pre block in // case we encounter an unallowed element.... if(!DOM::checkChild(ID_PRE, tagID)) { //kdDebug(0) << " not allowed in <pre> " << (int)tagID << endl; pre = false; } } if ( tagID == ID_PRE ) { prePos = 0; pre = beginTag; } else if ( tagID == ID_TEXTAREA ) { if(beginTag) { listing = true; textarea = true; searchCount = 0; searchFor = textareaEnd; scriptCode = QT_ALLOC_QCHAR_VEC( 1024 ); scriptCodeSize = 0; scriptCodeMaxSize = 1024; parseListing(src); } } else if ( tagID == ID_SCRIPT ) { if (beginTag) {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "start of script, token->id = " << currToken->id << endl;#endif script = true; searchCount = 0; searchFor = scriptEnd; scriptCode = QT_ALLOC_QCHAR_VEC( 1024 ); scriptCodeSize = 0; scriptCodeMaxSize = 1024; parseScript(src);#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "end of script" << endl;#endif } } else if ( tagID == ID_STYLE ) { if (beginTag) { style = true; searchCount = 0; searchFor = styleEnd; scriptCode = QT_ALLOC_QCHAR_VEC( 1024 ); scriptCodeSize = 0; scriptCodeMaxSize = 1024; parseStyle(src); } } else if ( tagID == ID_LISTING ) { if (beginTag) { listing = true; searchCount = 0; searchFor = listingEnd; scriptCode = QT_ALLOC_QCHAR_VEC( 1024 ); scriptCodeSize = 0; scriptCodeMaxSize = 1024; parseListing(src); } } else if ( tagID == ID_SELECT ) { select = beginTag; } return; // Finished parsing tag! } default: {#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "error in parseTag! " << __LINE__ << endl;#endif return; } } // end switch } } return;}void HTMLTokenizer::addPending(){ if ( tag || select) { *dest++ = ' '; } else if ( textarea ) { if (pending == LFPending) *dest++ = '\n'; else *dest++ = ' '; } else if ( pre ) { int p; switch (pending) { case SpacePending: // Insert a non-breaking space *dest++ = QChar(' '); prePos++; break; case LFPending: *dest = '\n'; dest++; prePos = 0; break; case TabPending: p = TAB_SIZE - ( prePos % TAB_SIZE ); for ( int x = 0; x < p; x++ ) { *dest = QChar(' '); dest++; } prePos += p; break; default:#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Assertion failed: pending = " << (int) pending << endl;#endif break; } } else { *dest++ = ' '; } pending = NonePending;}void HTMLTokenizer::setPlainText(){ if (!plaintext) { // Do this only once! plaintext = true; currToken->id = ID_PLAIN; processToken(); dest = buffer; }}void HTMLTokenizer::write( const QString &str ){ // we have to make this function reentrant. This is needed, because some // script code could call document.write(), which would add something here.#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "Tokenizer::write(\"" << str << "\")" << endl;#endif#if DEBUG_BY_XHTANG fprintf(stderr,"HTMLTokenizer::write strEmpty:%d buffer:%p\n",str.isEmpty(),buffer); fprintf(stderr,"HTMLTokenizer::write str:%s\n",(const char*)str);#endif if ( str.isEmpty() || buffer == 0L ) return; // reentrant... // we just insert the code at the tokenizers current position. Parsing will continue once // we return from the script stuff // (this won't happen if we're in the middle of loading an external script) if(executingScript) {#if DEBUG_BY_XHTANG fprintf(stderr,"HTMLTokenizer::write executingScript\n");#endif#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "adding to scriptOutput" << endl;#endif scriptOutput += str; return; } if (loadingExtScript) {#if DEBUG_BY_XHTANG fprintf(stderr,"HTMLTokenizer::write loadingExtScript\n"); #endif // don't parse; we will do this later pendingSrc += str; return; } _src = str; src = DOMStringIt(_src); if(!currToken) currToken = new Token; if (plaintext) parseText(src); else if (comment)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -