📄 parserimpl.java

📁 windows 代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
                /* discard unexpected text nodes and end tags */
                Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
            }

            if (HasTitle == 0)
            {
                Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
                Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
            }
        }

    };

    public static class ParseTitle implements Parser {

        public void parse( Lexer lexer, Node title, short mode )
        {
            Node node;

            while (true)
            {
                node = lexer.getToken(Lexer.MixedContent);
                if (node == null) break;
                if (node.tag == title.tag && node.type == Node.EndTag)
                {
                    title.closed = true;
                    Node.trimSpaces(lexer, title);
                    return;
                }

                if (node.type == Node.TextNode)
                {
                    /* only called for 1st child */
                    if (title.content == null)
                        Node.trimInitialSpace(lexer, title, node);

                    if (node.start >= node.end)
                    {
                        continue;
                    }

                    Node.insertNodeAtEnd(title, node);
                    continue;
                }

                /* deal with comments etc. */
                if (Node.insertMisc(title, node))
                    continue;

                /* discard unknown tags */
                if (node.tag == null)
                {
                    Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                /* pushback unexpected tokens */
                Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
                lexer.ungetToken();
                Node.trimSpaces(lexer, title);
                return;
            }

            Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
        }

    };

    public static class ParseScript implements Parser {

        public void parse( Lexer lexer, Node script, short mode )
        {
        /*
          This isn't quite right for CDATA content as it recognises
          tags within the content and parses them accordingly.
          This will unfortunately screw up scripts which include
          < + letter,  < + !, < + ?  or  < + / + letter
        */

            Node node;

            node = lexer.getCDATA( script);

            if (node != null)
                Node.insertNodeAtEnd(script, node);
        }

    };

    public static class ParseBody implements Parser {

        public void parse( Lexer lexer, Node body, short mode )
        {
            Node node;
            boolean checkstack, iswhitenode;

            mode = Lexer.IgnoreWhitespace;
            checkstack = true;

            while (true)
            {
                node = lexer.getToken(mode);
                if (node == null) break;
                if (node.tag == body.tag && node.type == Node.EndTag)
                {
                    body.closed = true;
                    Node.trimSpaces(lexer, body);
                    lexer.seenBodyEndTag = 1;
                    mode = Lexer.IgnoreWhitespace;

                    if (body.parent.tag == TagTable.tagNoframes)
                        break;

                    continue;
                }
        
                if (node.tag == TagTable.tagNoframes)
                {
                    if (node.type == Node.StartTag)
                    {
                        Node.insertNodeAtEnd(body, node);
                        getParseBlock().parse(lexer, node, mode);
                        continue;
                    }

                    if (node.type == Node.EndTag &&
                        body.parent.tag == TagTable.tagNoframes)
                    {
                        Node.trimSpaces(lexer, body);
                        lexer.ungetToken();
                        break;
                    }
                }

                if ((node.tag == TagTable.tagFrame || node.tag == TagTable.tagFrameset)
                    && body.parent.tag == TagTable.tagNoframes)
                {
                    Node.trimSpaces(lexer, body);
                    lexer.ungetToken();
                    break;
                }
        
                if (node.tag == TagTable.tagHtml)
                {
                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
                        Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);

                    continue;
                }

                iswhitenode = false;

                if (node.type == Node.TextNode &&
                       node.end <= node.start + 1 &&
                       node.textarray[node.start] == (byte)' ')
                    iswhitenode = true;

                /* deal with comments etc. */
                if (Node.insertMisc(body, node))
                    continue;

                if (lexer.seenBodyEndTag == 1 && !iswhitenode)
                {
                    ++lexer.seenBodyEndTag;
                    Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
                }

                /* mixed content model permits text */
                if (node.type == Node.TextNode)
                {
                    if (iswhitenode && mode == Lexer.IgnoreWhitespace)
                    {
                        continue;
                    }

                    if (lexer.configuration.EncloseBodyText && !iswhitenode)
                    {
                        Node para;
                
                        lexer.ungetToken();
                        para = lexer.inferredTag("p");
                        Node.insertNodeAtEnd(body, para);
                        parseTag(lexer, para, mode);
                        mode = Lexer.MixedContent;
                        continue;
                    }
                    else /* strict doesn't allow text here */
                        lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);

                    if (checkstack)
                    {
                        checkstack = false;

                        if (lexer.inlineDup( node) > 0)
                            continue;
                    }

                    Node.insertNodeAtEnd(body, node);
                    mode = Lexer.MixedContent;
                    continue;
                }

                if (node.type == Node.DocTypeTag)
                {
                    Node.insertDocType(lexer, body, node);
                    continue;
                }
                /* discard unknown  and PARAM tags */
                if (node.tag == null || node.tag == TagTable.tagParam)
                {
                    Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                /*
                  Netscape allows LI and DD directly in BODY
                  We infer UL or DL respectively and use this
                  boolean to exclude block-level elements so as
                  to match Netscape's observed behaviour.
                */
                lexer.excludeBlocks = false;
        
                if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
                    !((node.tag.model & Dict.CM_INLINE) != 0))
                {
                    /* avoid this error message being issued twice */
                    if (!((node.tag.model & Dict.CM_HEAD) != 0))
                        Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);

                    if ((node.tag.model & Dict.CM_HTML) != 0)
                    {
                        /* copy body attributes if current body was inferred */
                        if (node.tag == TagTable.tagBody && body.implicit 
                                            && body.attributes == null)
                        {
                            body.attributes = node.attributes;
                            node.attributes = null;
                        }

                        continue;
                    }

                    if ((node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        moveToHead(lexer, body, node);
                        continue;
                    }

                    if ((node.tag.model & Dict.CM_LIST) != 0)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag( "ul");
                        Node.addClass(node, "noindent");
                        lexer.excludeBlocks = true;
                    }
                    else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag( "dl");
                        lexer.excludeBlocks = true;
                    }
                    else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag( "table");
                        lexer.excludeBlocks = true;
                    }
                    else
                    {
                        /* AQ: The following line is from the official C
                           version of tidy.  It doesn't make sense to me
                           because the '!' operator has higher precedence
                           than the '&' operator.  It seems to me that the
                           expression always evaluates to 0.

                           if (!node->tag->model & (CM_ROW | CM_FIELD))

                           AQ: 13Jan2000 fixed in C tidy
                        */
                        if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
                        {
                            lexer.ungetToken();
                            return;
                        }

                        /* ignore </td> </th> <option> etc. */
                        continue;
                    }
                }

                if (node.type == Node.EndTag)
                {
                    if (node.tag == TagTable.tagBr)
                        node.type = Node.StartTag;
                    else if (node.tag == TagTable.tagP)
                    {
                        Node.coerceNode(lexer, node, TagTable.tagBr);
                        Node.insertNodeAtEnd(body, node);
                        node = lexer.inferredTag("br");
                    }
                    else if ((node.tag.model & Dict.CM_INLINE) != 0)
                        lexer.popInline(node);
                }

                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
                {
                    if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
                    {
                        /* HTML4 strict doesn't allow inline content here */
                        /* but HTML2 does allow img elements as children of body */
                        if (node.tag == TagTable.tagImg)
                            lexer.versions &= ~Dict.VERS_HTML40_STRICT;
                        else
                            lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);

                        if (checkstack && !node.implicit)
                        {
                            checkstack = false;

                            if (lexer.inlineDup( node) > 0)
                                continue;
                        }

                        mode = Lexer.MixedContent;
                    }
                    else
                    {
                        checkstack = true;
                        mode = Lexer.IgnoreWhitespace;
                    }

                    if (node.implicit)
                        Report.warning(lexer, body, node, Report.INSERTING_TAG);

                    Node.insertNodeAtEnd(body, node);
                    parseTag(lexer, node, mode);
                    continue;
                }

                /* discard unexpected tags */
                Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
            }
        }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -