📄 parserimpl.java

📁 windows 代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 * @(#)ParserImpl.java   1.11 2000/08/16
 *
 */

package org.w3c.tidy;

/**
 *
 * HTML Parser implementation
 *
 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
 * See Tidy.java for the copyright notice.
 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
 * HTML Tidy Release 4 Aug 2000</a>
 *
 * @author  Dave Raggett <dsr@w3.org>
 * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
 * @version 1.0, 1999/05/22
 * @version 1.0.1, 1999/05/29
 * @version 1.1, 1999/06/18 Java Bean
 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
 * @version 1.4, 1999/09/04 DOM support
 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
 */

public class ParserImpl {

    //private static int SeenBodyEndTag;  /* AQ: moved into lexer structure */

    private static void parseTag(Lexer lexer, Node node, short mode)
    {
        if ((node.tag.model & Dict.CM_EMPTY) != 0)
        {
            lexer.waswhite = false;
            return;
        }
        else if (!((node.tag.model & Dict.CM_INLINE) != 0))
            lexer.insertspace = false;

        if (node.tag.parser == null || node.type == Node.StartEndTag)
            return;

        node.tag.parser.parse(lexer, node, mode);
    }

    private static void moveToHead(Lexer lexer, Node element, Node node)
    {
        Node head;


        if (node.type == Node.StartTag || node.type == Node.StartEndTag)
        {
            Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

            while (element.tag != TagTable.tagHtml)
                element = element.parent;

            for (head = element.content; head != null; head = head.next)
            {
                if (head.tag == TagTable.tagHead)
                {
                    Node.insertNodeAtEnd(head, node);
                    break;
                }
            }

            if (node.tag.parser != null)
                parseTag(lexer, node, Lexer.IgnoreWhitespace);
        }
        else
        {
            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
        }
    }

    public static class ParseHTML implements Parser {

        public void parse( Lexer lexer, Node html, short mode )
        {
            Node node, head;
            Node frameset = null;
            Node noframes = null;

            lexer.configuration.XmlTags = false;
            lexer.seenBodyEndTag = 0;

            for (;;)
            {
                node = lexer.getToken(Lexer.IgnoreWhitespace);

                if (node == null)
                {
                    node = lexer.inferredTag("head");
                    break;
                }

                if (node.tag == TagTable.tagHead)
                    break;

                if (node.tag == html.tag && node.type == Node.EndTag)
                {
                    Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                /* deal with comments etc. */
                if (Node.insertMisc(html, node))
                    continue;

                lexer.ungetToken();
                node = lexer.inferredTag("head");
                break;
            }

            head = node;
            Node.insertNodeAtEnd(html, head);
            getParseHead().parse(lexer, head, mode);

            for (;;)
            {
                node = lexer.getToken(Lexer.IgnoreWhitespace);

                if (node == null)
                {
                    if (frameset == null) /* create an empty body */
                        node = lexer.inferredTag("body");

                    return;
                }

                /* robustly handle html tags */
                if (node.tag == html.tag)
                {
                    if (node.type != Node.StartTag && frameset == null)
                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);

                    continue;
                }

                /* deal with comments etc. */
                if (Node.insertMisc(html, node))
                    continue;

                /* if frameset document coerce <body> to <noframes> */
                if (node.tag == TagTable.tagBody)
                {
                    if (node.type != Node.StartTag)
                    {
                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (frameset != null)
                    {
                        lexer.ungetToken();

                        if (noframes == null)
                        {
                            noframes = lexer.inferredTag("noframes");
                            Node.insertNodeAtEnd(frameset, noframes);
                            Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
                        }

                        parseTag(lexer, noframes, mode);
                        continue;
                    }

                    break;  /* to parse body */
                }

                /* flag an error if we see more than one frameset */
                if (node.tag == TagTable.tagFrameset)
                {
                    if (node.type != Node.StartTag)
                    {
                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (frameset != null)
                        Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
                    else
                        frameset = node;

                    Node.insertNodeAtEnd(html, node);
                    parseTag(lexer, node, mode);

                    /*
                      see if it includes a noframes element so
                      that we can merge subsequent noframes elements
                    */

                    for (node = frameset.content; node != null; node = node.next)
                    {
                        if (node.tag == TagTable.tagNoframes)
                            noframes = node;
                    }
                    continue;
                }

                /* if not a frameset document coerce <noframes> to <body> */
                if (node.tag == TagTable.tagNoframes)
                {
                    if (node.type != Node.StartTag)
                    {
                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (frameset == null)
                    {
                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        node = lexer.inferredTag("body");
                        break;
                    }

                    if (noframes == null)
                    {
                        noframes = node;
                        Node.insertNodeAtEnd(frameset, noframes);
                    }

                    parseTag(lexer, noframes, mode);
                    continue;
                }

                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
                {
                    if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        moveToHead(lexer, html, node);
                        continue;
                    }
                }

                lexer.ungetToken();

                /* insert other content into noframes element */

                if (frameset != null)
                {
                    if (noframes == null)
                    {
                        noframes = lexer.inferredTag("noframes");
                        Node.insertNodeAtEnd(frameset, noframes);
                    }
                    else
                        Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);

                    parseTag(lexer, noframes, mode);
                    continue;
                }

                node = lexer.inferredTag("body");
                break;
            }

            /* node must be body */

            Node.insertNodeAtEnd(html, node);
            parseTag(lexer, node, mode);
        }

    };

    public static class ParseHead implements Parser {

        public void parse( Lexer lexer, Node head, short mode )
        {
            Node node;
            int HasTitle = 0;
            int HasBase = 0;

            while (true)
            {
                node = lexer.getToken(Lexer.IgnoreWhitespace);
                if (node == null) break;
                if (node.tag == head.tag && node.type == Node.EndTag)
                {
                    head.closed = true;
                    break;
                }

                if (node.type == Node.TextNode)
                {
                    lexer.ungetToken();
                    break;
                }

                /* deal with comments etc. */
                if (Node.insertMisc(head, node))
                    continue;

                if (node.type == Node.DocTypeTag)
                {
                    Node.insertDocType(lexer, head, node);
                    continue;
                }

                /* discard unknown tags */
                if (node.tag == null)
                {
                    Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }
        
                if (!((node.tag.model & Dict.CM_HEAD) != 0))
                {
                    lexer.ungetToken();
                    break;
                }

                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
                {
                    if (node.tag == TagTable.tagTitle)
                    {
                        ++HasTitle;

                        if (HasTitle > 1)
                            Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
                    }
                    else if (node.tag == TagTable.tagBase)
                    {
                        ++HasBase;

                        if (HasBase > 1)
                            Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
                    }
                    else if (node.tag == TagTable.tagNoscript)
                        Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);

                    Node.insertNodeAtEnd(head, node);
                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
                    continue;
                }
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -