📄 parserimpl.java
字号:
/* discard unexpected text nodes and end tags */
Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
}
if (HasTitle == 0)
{
Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
}
}
};
public static class ParseTitle implements Parser {
public void parse( Lexer lexer, Node title, short mode )
{
Node node;
while (true)
{
node = lexer.getToken(Lexer.MixedContent);
if (node == null) break;
if (node.tag == title.tag && node.type == Node.EndTag)
{
title.closed = true;
Node.trimSpaces(lexer, title);
return;
}
if (node.type == Node.TextNode)
{
/* only called for 1st child */
if (title.content == null)
Node.trimInitialSpace(lexer, title, node);
if (node.start >= node.end)
{
continue;
}
Node.insertNodeAtEnd(title, node);
continue;
}
/* deal with comments etc. */
if (Node.insertMisc(title, node))
continue;
/* discard unknown tags */
if (node.tag == null)
{
Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* pushback unexpected tokens */
Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
lexer.ungetToken();
Node.trimSpaces(lexer, title);
return;
}
Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
}
};
public static class ParseScript implements Parser {
public void parse( Lexer lexer, Node script, short mode )
{
/*
This isn't quite right for CDATA content as it recognises
tags within the content and parses them accordingly.
This will unfortunately screw up scripts which include
< + letter, < + !, < + ? or < + / + letter
*/
Node node;
node = lexer.getCDATA( script);
if (node != null)
Node.insertNodeAtEnd(script, node);
}
};
public static class ParseBody implements Parser {
public void parse( Lexer lexer, Node body, short mode )
{
Node node;
boolean checkstack, iswhitenode;
mode = Lexer.IgnoreWhitespace;
checkstack = true;
while (true)
{
node = lexer.getToken(mode);
if (node == null) break;
if (node.tag == body.tag && node.type == Node.EndTag)
{
body.closed = true;
Node.trimSpaces(lexer, body);
lexer.seenBodyEndTag = 1;
mode = Lexer.IgnoreWhitespace;
if (body.parent.tag == TagTable.tagNoframes)
break;
continue;
}
if (node.tag == TagTable.tagNoframes)
{
if (node.type == Node.StartTag)
{
Node.insertNodeAtEnd(body, node);
getParseBlock().parse(lexer, node, mode);
continue;
}
if (node.type == Node.EndTag &&
body.parent.tag == TagTable.tagNoframes)
{
Node.trimSpaces(lexer, body);
lexer.ungetToken();
break;
}
}
if ((node.tag == TagTable.tagFrame || node.tag == TagTable.tagFrameset)
&& body.parent.tag == TagTable.tagNoframes)
{
Node.trimSpaces(lexer, body);
lexer.ungetToken();
break;
}
if (node.tag == TagTable.tagHtml)
{
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
continue;
}
iswhitenode = false;
if (node.type == Node.TextNode &&
node.end <= node.start + 1 &&
node.textarray[node.start] == (byte)' ')
iswhitenode = true;
/* deal with comments etc. */
if (Node.insertMisc(body, node))
continue;
if (lexer.seenBodyEndTag == 1 && !iswhitenode)
{
++lexer.seenBodyEndTag;
Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
}
/* mixed content model permits text */
if (node.type == Node.TextNode)
{
if (iswhitenode && mode == Lexer.IgnoreWhitespace)
{
continue;
}
if (lexer.configuration.EncloseBodyText && !iswhitenode)
{
Node para;
lexer.ungetToken();
para = lexer.inferredTag("p");
Node.insertNodeAtEnd(body, para);
parseTag(lexer, para, mode);
mode = Lexer.MixedContent;
continue;
}
else /* strict doesn't allow text here */
lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
if (checkstack)
{
checkstack = false;
if (lexer.inlineDup( node) > 0)
continue;
}
Node.insertNodeAtEnd(body, node);
mode = Lexer.MixedContent;
continue;
}
if (node.type == Node.DocTypeTag)
{
Node.insertDocType(lexer, body, node);
continue;
}
/* discard unknown and PARAM tags */
if (node.tag == null || node.tag == TagTable.tagParam)
{
Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/*
Netscape allows LI and DD directly in BODY
We infer UL or DL respectively and use this
boolean to exclude block-level elements so as
to match Netscape's observed behaviour.
*/
lexer.excludeBlocks = false;
if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
!((node.tag.model & Dict.CM_INLINE) != 0))
{
/* avoid this error message being issued twice */
if (!((node.tag.model & Dict.CM_HEAD) != 0))
Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
if ((node.tag.model & Dict.CM_HTML) != 0)
{
/* copy body attributes if current body was inferred */
if (node.tag == TagTable.tagBody && body.implicit
&& body.attributes == null)
{
body.attributes = node.attributes;
node.attributes = null;
}
continue;
}
if ((node.tag.model & Dict.CM_HEAD) != 0)
{
moveToHead(lexer, body, node);
continue;
}
if ((node.tag.model & Dict.CM_LIST) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "ul");
Node.addClass(node, "noindent");
lexer.excludeBlocks = true;
}
else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "dl");
lexer.excludeBlocks = true;
}
else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
{
lexer.ungetToken();
node = lexer.inferredTag( "table");
lexer.excludeBlocks = true;
}
else
{
/* AQ: The following line is from the official C
version of tidy. It doesn't make sense to me
because the '!' operator has higher precedence
than the '&' operator. It seems to me that the
expression always evaluates to 0.
if (!node->tag->model & (CM_ROW | CM_FIELD))
AQ: 13Jan2000 fixed in C tidy
*/
if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
{
lexer.ungetToken();
return;
}
/* ignore </td> </th> <option> etc. */
continue;
}
}
if (node.type == Node.EndTag)
{
if (node.tag == TagTable.tagBr)
node.type = Node.StartTag;
else if (node.tag == TagTable.tagP)
{
Node.coerceNode(lexer, node, TagTable.tagBr);
Node.insertNodeAtEnd(body, node);
node = lexer.inferredTag("br");
}
else if ((node.tag.model & Dict.CM_INLINE) != 0)
lexer.popInline(node);
}
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
{
/* HTML4 strict doesn't allow inline content here */
/* but HTML2 does allow img elements as children of body */
if (node.tag == TagTable.tagImg)
lexer.versions &= ~Dict.VERS_HTML40_STRICT;
else
lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
if (checkstack && !node.implicit)
{
checkstack = false;
if (lexer.inlineDup( node) > 0)
continue;
}
mode = Lexer.MixedContent;
}
else
{
checkstack = true;
mode = Lexer.IgnoreWhitespace;
}
if (node.implicit)
Report.warning(lexer, body, node, Report.INSERTING_TAG);
Node.insertNodeAtEnd(body, node);
parseTag(lexer, node, mode);
continue;
}
/* discard unexpected tags */
Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -