📄 parserimpl.java
字号:
/*
* @(#)ParserImpl.java 1.11 2000/08/16
*
*/
package org.w3c.tidy;
/**
*
* HTML Parser implementation
*
* (c) 1998-2000 (W3C) MIT, INRIA, Keio University
* See Tidy.java for the copyright notice.
* Derived from <a href="http://www.w3.org/People/Raggett/tidy">
* HTML Tidy Release 4 Aug 2000</a>
*
* @author Dave Raggett <dsr@w3.org>
* @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
* @version 1.0, 1999/05/22
* @version 1.0.1, 1999/05/29
* @version 1.1, 1999/06/18 Java Bean
* @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
* @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
* @version 1.4, 1999/09/04 DOM support
* @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
* @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
* @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
* @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
* @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
* @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
* @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
*/
public class ParserImpl {
//private static int SeenBodyEndTag; /* AQ: moved into lexer structure */
private static void parseTag(Lexer lexer, Node node, short mode)
{
if ((node.tag.model & Dict.CM_EMPTY) != 0)
{
lexer.waswhite = false;
return;
}
else if (!((node.tag.model & Dict.CM_INLINE) != 0))
lexer.insertspace = false;
if (node.tag.parser == null || node.type == Node.StartEndTag)
return;
node.tag.parser.parse(lexer, node, mode);
}
private static void moveToHead(Lexer lexer, Node element, Node node)
{
Node head;
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
while (element.tag != TagTable.tagHtml)
element = element.parent;
for (head = element.content; head != null; head = head.next)
{
if (head.tag == TagTable.tagHead)
{
Node.insertNodeAtEnd(head, node);
break;
}
}
if (node.tag.parser != null)
parseTag(lexer, node, Lexer.IgnoreWhitespace);
}
else
{
Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
}
}
public static class ParseHTML implements Parser {
public void parse( Lexer lexer, Node html, short mode )
{
Node node, head;
Node frameset = null;
Node noframes = null;
lexer.configuration.XmlTags = false;
lexer.seenBodyEndTag = 0;
for (;;)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null)
{
node = lexer.inferredTag("head");
break;
}
if (node.tag == TagTable.tagHead)
break;
if (node.tag == html.tag && node.type == Node.EndTag)
{
Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* deal with comments etc. */
if (Node.insertMisc(html, node))
continue;
lexer.ungetToken();
node = lexer.inferredTag("head");
break;
}
head = node;
Node.insertNodeAtEnd(html, head);
getParseHead().parse(lexer, head, mode);
for (;;)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null)
{
if (frameset == null) /* create an empty body */
node = lexer.inferredTag("body");
return;
}
/* robustly handle html tags */
if (node.tag == html.tag)
{
if (node.type != Node.StartTag && frameset == null)
Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/* deal with comments etc. */
if (Node.insertMisc(html, node))
continue;
/* if frameset document coerce <body> to <noframes> */
if (node.tag == TagTable.tagBody)
{
if (node.type != Node.StartTag)
{
Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (frameset != null)
{
lexer.ungetToken();
if (noframes == null)
{
noframes = lexer.inferredTag("noframes");
Node.insertNodeAtEnd(frameset, noframes);
Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
}
parseTag(lexer, noframes, mode);
continue;
}
break; /* to parse body */
}
/* flag an error if we see more than one frameset */
if (node.tag == TagTable.tagFrameset)
{
if (node.type != Node.StartTag)
{
Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (frameset != null)
Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
else
frameset = node;
Node.insertNodeAtEnd(html, node);
parseTag(lexer, node, mode);
/*
see if it includes a noframes element so
that we can merge subsequent noframes elements
*/
for (node = frameset.content; node != null; node = node.next)
{
if (node.tag == TagTable.tagNoframes)
noframes = node;
}
continue;
}
/* if not a frameset document coerce <noframes> to <body> */
if (node.tag == TagTable.tagNoframes)
{
if (node.type != Node.StartTag)
{
Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (frameset == null)
{
Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
node = lexer.inferredTag("body");
break;
}
if (noframes == null)
{
noframes = node;
Node.insertNodeAtEnd(frameset, noframes);
}
parseTag(lexer, noframes, mode);
continue;
}
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
{
moveToHead(lexer, html, node);
continue;
}
}
lexer.ungetToken();
/* insert other content into noframes element */
if (frameset != null)
{
if (noframes == null)
{
noframes = lexer.inferredTag("noframes");
Node.insertNodeAtEnd(frameset, noframes);
}
else
Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
parseTag(lexer, noframes, mode);
continue;
}
node = lexer.inferredTag("body");
break;
}
/* node must be body */
Node.insertNodeAtEnd(html, node);
parseTag(lexer, node, mode);
}
};
public static class ParseHead implements Parser {
public void parse( Lexer lexer, Node head, short mode )
{
Node node;
int HasTitle = 0;
int HasBase = 0;
while (true)
{
node = lexer.getToken(Lexer.IgnoreWhitespace);
if (node == null) break;
if (node.tag == head.tag && node.type == Node.EndTag)
{
head.closed = true;
break;
}
if (node.type == Node.TextNode)
{
lexer.ungetToken();
break;
}
/* deal with comments etc. */
if (Node.insertMisc(head, node))
continue;
if (node.type == Node.DocTypeTag)
{
Node.insertDocType(lexer, head, node);
continue;
}
/* discard unknown tags */
if (node.tag == null)
{
Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
continue;
}
if (!((node.tag.model & Dict.CM_HEAD) != 0))
{
lexer.ungetToken();
break;
}
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
{
if (node.tag == TagTable.tagTitle)
{
++HasTitle;
if (HasTitle > 1)
Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
}
else if (node.tag == TagTable.tagBase)
{
++HasBase;
if (HasBase > 1)
Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
}
else if (node.tag == TagTable.tagNoscript)
Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
Node.insertNodeAtEnd(head, node);
parseTag(lexer, node, Lexer.IgnoreWhitespace);
continue;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -