📄 htmltokenizer.cpp
字号:
dest = buffer;
*dest++ = a;
#ifdef TOKEN_DEBUG
if (!a || (cBufferPos && *cBuffer == '!'))
kdDebug( 6036 ) << "Unknown attribute: *" << QCString(cBuffer, cBufferPos+1).data() << "*" << endl;
else
kdDebug( 6036 ) << "Known attribute: " << QCString(cBuffer, cBufferPos+1).data() << endl;
#endif
tag = SearchEqual;
break;
}
}
// Use tolower() instead of | 0x20 to lowercase the char because there is no
// performance gain in using | 0x20 since tolower() is optimized and
// | 0x20 turns characters such as '_' into junk.
cBuffer[cBufferPos++] = tolower(curchar);
++src;
}
if ( cBufferPos == CBUFLEN ) {
cBuffer[cBufferPos] = '\0';
attrName = QString::fromLatin1(QCString(cBuffer, cBufferPos+1).data());
attrNamePresent = !attrName.isEmpty();
dest = buffer;
*dest++ = 0;
tag = SearchEqual;
}
break;
}
case SearchEqual:
{
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
qDebug("SearchEqual");
#endif
ushort curchar;
bool atespace = false;
while(!src.isEmpty()) {
curchar = src->unicode();
// In this mode just ignore any quotes we encounter and treat them like spaces.
if (curchar > ' ' && curchar != '\'' && curchar != '"') {
if(curchar == '=') {
#ifdef TOKEN_DEBUG
kdDebug(6036) << "found equal" << endl;
#endif
tag = SearchValue;
++src;
}
else {
#ifdef NOKIA_CHANGES
// For some reason, RVCT compiler cannot correctly initialize global object DOM::emptyAtom.
currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, AtomicString("") );
#else
currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, emptyAtom );
#endif
dest = buffer;
tag = SearchAttribute;
}
break;
}
atespace = true;
++src;
}
break;
}
case SearchValue:
{
ushort curchar;
while(!src.isEmpty()) {
curchar = src->unicode();
if(curchar > ' ') {
if(( curchar == '\'' || curchar == '\"' )) {
tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
tag = QuotedValue;
++src;
} else
tag = Value;
break;
}
++src;
}
break;
}
case QuotedValue:
{
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
qDebug("QuotedValue");
#endif
ushort curchar;
while(!src.isEmpty()) {
RETURN_IF_OOM( checkBuffer() );
curchar = src->unicode();
if (curchar == '>' && !attrNamePresent) {
// Handle a case like <img '>. Just go ahead and be willing
// to close the whole tag. Don't consume the character and
// just go back into SearchEnd while ignoring the whole
// value.
// FIXME: Note that this is actually not a very good solution. It's
// an interim hack and doesn't handle the general case of
// unmatched quotes among attributes that have names. -dwh
while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
dest--; // remove trailing newlines
AtomicString v(buffer+1, dest-buffer-1);
attrName.setUnicode(buffer+1,dest-buffer-1);
currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
tag = SearchAttribute;
dest = buffer;
tquote = NoQuote;
break;
}
if(curchar <= '\'' && !src.escaped()) {
// ### attributes like '&{blaa....};' are supposed to be treated as jscript.
if ( curchar == '&' )
{
++src;
parseEntity(src, dest, true);
break;
}
else if ( (tquote == SingleQuote && curchar == '\'') ||
(tquote == DoubleQuote && curchar == '\"') )
{
// some <input type=hidden> rely on trailing spaces. argh
while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
dest--; // remove trailing newlines
AtomicString v(buffer+1, dest-buffer-1);
if (!attrNamePresent)
attrName.setUnicode(buffer+1,dest-buffer-1);
currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
dest = buffer;
tag = SearchAttribute;
tquote = NoQuote;
++src;
break;
}
}
*dest = *src;
fixUpChar(*dest);
++dest;
++src;
}
break;
}
case Value:
{
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
qDebug("Value");
#endif
ushort curchar;
while(!src.isEmpty()) {
RETURN_IF_OOM( checkBuffer() );
curchar = src->unicode();
if(curchar <= '>' && !src.escaped()) {
// parse Entities
if ( curchar == '&' )
{
++src;
parseEntity(src, dest, true);
break;
}
// no quotes. Every space means end of value
// '/' does not delimit in IE!
if ( curchar <= ' ' || curchar == '>' )
{
AtomicString v(buffer+1, dest-buffer-1);
currToken.addAttribute(parser->docPtr()->document(), buffer, attrName, v);
dest = buffer;
tag = SearchAttribute;
break;
}
}
*dest = *src;
fixUpChar(*dest);
++dest;
++src;
}
break;
}
case SearchEnd:
{
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
qDebug("SearchEnd");
#endif
while(!src.isEmpty()) {
if (*src == '>' || *src == '<')
break;
if (*src == '/')
currToken.flat = true;
++src;
}
if (src.isEmpty()) break;
searchCount = 0; // Stop looking for '<!--' sequence
tag = NoTag;
tquote = NoQuote;
if (*src != '<')
++src;
if ( !currToken.id ) //stop if tag is unknown
return;
uint tagID = currToken.id;
#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
kdDebug( 6036 ) << "appending Tag: " << tagID << endl;
#endif
bool beginTag = !currToken.flat && (tagID <= ID_CLOSE_TAG);
if (tagID > ID_CLOSE_TAG)
tagID -= ID_CLOSE_TAG;
else if (tagID == ID_SCRIPT) {
AttributeImpl* a = 0;
bool foundTypeAttribute = false;
scriptSrc = QString::null;
scriptSrcCharset = QString::null;
if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
parser->doc()->part() &&
parser->doc()->part()->jScriptEnabled() && /* jscript allowed at all? */
view /* are we a regular tokenizer or just for innerHTML ? */
) {
if ( ( a = currToken.attrs->getAttributeItem( ATTR_SRC ) ) )
scriptSrc = parser->doc()->completeURL(parseURL( a->value() ).string() );
if ( ( a = currToken.attrs->getAttributeItem( ATTR_CHARSET ) ) )
scriptSrcCharset = a->value().string().stripWhiteSpace();
if ( scriptSrcCharset.isEmpty() )
scriptSrcCharset = parser->doc()->part()->encoding();
/* Check type before language, since language is deprecated */
if ((a = currToken.attrs->getAttributeItem(ATTR_TYPE)) != 0 && !a->value().string().isEmpty())
foundTypeAttribute = true;
else
a = currToken.attrs->getAttributeItem(ATTR_LANGUAGE);
}
javascript = true;
if( foundTypeAttribute ) {
/*
Mozilla 1.5 accepts application/x-javascript, and some web references claim it is the only
correct variation, but WinIE 6 doesn't accept it.
Neither Mozilla 1.5 nor WinIE 6 accept application/javascript, application/ecmascript, or
application/x-ecmascript.
Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
We want to accept all the values that either of these browsers accept, but not other values.
*/
QString type = a->value().string().stripWhiteSpace().lower();
if( type.compare("application/x-javascript") != 0 &&
type.compare("text/javascript") != 0 &&
type.compare("text/javascript1.0") != 0 &&
type.compare("text/javascript1.1") != 0 &&
type.compare("text/javascript1.2") != 0 &&
type.compare("text/javascript1.3") != 0 &&
type.compare("text/javascript1.4") != 0 &&
type.compare("text/javascript1.5") != 0 &&
type.compare("text/jscript") != 0 &&
type.compare("text/ecmascript") != 0 &&
type.compare("text/livescript") )
javascript = false;
} else if( a ) {
/*
Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
We want to accept all the values that either of these browsers accept, but not other values.
*/
QString lang = a->value().string();
lang = lang.lower();
if( lang.compare("") != 0 &&
lang.compare("javascript") != 0 &&
lang.compare("javascript1.0") != 0 &&
lang.compare("javascript1.1") != 0 &&
lang.compare("javascript1.2") != 0 &&
lang.compare("javascript1.3") != 0 &&
lang.compare("javascript1.4") != 0 &&
lang.compare("javascript1.5") != 0 &&
lang.compare("ecmascript") != 0 &&
lang.compare("livescript") != 0 &&
lang.compare("jscript") )
javascript = false;
}
}
processToken();
// we have to take care to close the pre block in
// case we encounter an unallowed element....
if(pre && beginTag && !DOM::checkChild(ID_PRE, tagID)) {
kdDebug(6036) << " not allowed in <pre> " << (int)tagID << endl;
pre = false;
}
switch( tagID ) {
case ID_PRE:
prePos = 0;
pre = beginTag;
break;
case ID_SCRIPT:
if (beginTag) {
searchStopper = scriptEnd;
searchStopperLen = 8;
script = true;
parseSpecial(src);
}
else if (tagID <= ID_CLOSE_TAG) // Handle <script src="foo"/>
scriptHandler();
break;
case ID_STYLE:
if (beginTag) {
searchStopper = styleEnd;
searchStopperLen = 7;
style = true;
parseSpecial(src);
}
break;
case ID_TEXTAREA:
if(beginTag) {
searchStopper = textareaEnd;
searchStopperLen = 10;
textarea = true;
parseSpecial(src);
}
break;
case ID_TITLE:
if (beginTag) {
searchStopper = titleEnd;
searchStopperLen = 7;
title = true;
parseSpecial(src);
}
break;
case ID_XMP:
if (beginTag) {
searchStopper = xmpEnd;
searchStopperLen = 5;
xmp = true;
parseSpecial(src);
}
break;
case ID_SELECT:
select = beginTag;
break;
case ID_PLAINTEXT:
plaintext = beginTag;
break;
}
if (beginTag && endTagRequirement(tagID) == FORBIDDEN)
// Don't discard LFs since this element has no end tag.
discard = NoneDiscard;
return; // Finished parsing tag!
}
} // end switch
}
return;
}
void HTMLTokenizer::addPending()
{
if ( select && !script )
{
*dest++ = ' ';
}
else if ( textarea || script )
{
switch(pending) {
case LFPending: *dest++ = '\n'; prePos = 0; break;
case SpacePending: *dest++ = ' '; ++prePos; break;
case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break;
case NonePending:
assert(0);
}
}
else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -