⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexer.java

📁 windows 代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            else /* naked & */
            {
                Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
            }
        }
        else
        {
            if (c != ';')    /* issue warning if not terminated by ';' */
            {
                /* set error position just before offending chararcter */
                this.lines = this.in.curline;
                this.columns = startcol;
                Report.entityError(this, Report.MISSING_SEMICOLON, str, c);
            }

            this.lexsize = start;

            if (ch == 160 && (mode & Preformatted) != 0)
                ch = ' ';

            addCharToLexer(ch);

            if (ch == '&' && !this.configuration.QuoteAmpersand)
            {
                addCharToLexer('a');
                addCharToLexer('m');
                addCharToLexer('p');
                addCharToLexer(';');
            }
        }
    }

    public char parseTagName()
    {
        short map;
        int c;

        /* fold case of first char in buffer */

        c = this.lexbuf[this.txtstart];
        map = MAP((char)c);

        if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
        {
            c += (int)((int)'a' - (int)'A');
            this.lexbuf[this.txtstart] = (byte)c;
        }

        while (true)
        {
            c = this.in.readChar();
            if (c == StreamIn.EndOfStream) break;
            map = MAP((char)c);

            if ((map & NAMECHAR) == 0)
                break;

            /* fold case of subsequent chars */

            if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
                c += (int)((int)'a' - (int)'A');

            addCharToLexer(c);
        }

        this.txtend = this.lexsize;
        return (char)c;
    }

    public void addStringLiteral(String str)
    {
        for ( int i = 0; i < str.length(); i++ ) {
            addCharToLexer( (int)str.charAt(i) );
        }
    }

    /* choose what version to use for new doctype */
    public short HTMLVersion()
    {
        short versions;

        versions = this.versions;

        if ((versions & Dict.VERS_HTML20) != 0)
            return Dict.VERS_HTML20;

        if ((versions & Dict.VERS_HTML32) != 0)
            return Dict.VERS_HTML32;

        if ((versions & Dict.VERS_HTML40_STRICT) != 0)
            return Dict.VERS_HTML40_STRICT;

        if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
            return Dict.VERS_HTML40_LOOSE;

        if ((versions & Dict.VERS_FRAMES) != 0)
            return Dict.VERS_FRAMES;

        return Dict.VERS_UNKNOWN;
    }

    public String HTMLVersionName()
    {
        short guessed;
        int j;

        guessed = apparentVersion();

        for (j = 0; j < W3CVersion.length; ++j)
        {
            if (guessed == W3CVersion[j].code)
            {
                if (this.isvoyager)
                    return W3CVersion[j].voyagerName;

                return W3CVersion[j].name;
            }
        }

        return null;
    }

    /* add meta element for Tidy */
    public boolean addGenerator(Node root)
    {
        AttVal attval;
        Node node;
        Node head = Node.findHEAD(root);

        if (head != null)
        {
            for (node = head.content; node != null; node = node.next)
            {
                if (node.tag == TagTable.tagMeta)
                {
                    attval = node.getAttrByName("name");

                    if (attval != null && attval.value != null &&
                        Lexer.wstrcasecmp(attval.value, "generator") == 0)
                    {
                        attval = node.getAttrByName("content");

                        if (attval != null && attval.value != null &&
                            attval.value.length() >= 9 &&
                            Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0)
                        {
                            return false;
                        }
                    }
                }
            }

            node = this.inferredTag("meta");
            node.addAttribute("content", "HTML Tidy, see www.w3.org");
            node.addAttribute("name", "generator");
            Node.insertNodeAtStart(head, node);
            return true;
        }

        return false;
    }

    /* return true if substring s is in p and isn't all in upper case */
    /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
    /* len is how many chars to check in p */
    private static boolean findBadSubString(String s, String p, int len)
    {
        int n = s.length();
        int i = 0;
        String ps;

        while (n < len)
        {
            ps = p.substring(i, i + n);
            if (wstrcasecmp(s, ps) == 0)
                return (!ps.equals(s.substring(0, n)));

            ++i;
            --len;
        }

        return false;
    }

    public boolean checkDocTypeKeyWords(Node doctype)
    {
        int len = doctype.end - doctype.start;
        String s = getString(this.lexbuf, doctype.start, len);

        return !(
            findBadSubString("SYSTEM", s, len) ||
            findBadSubString("PUBLIC", s, len) ||
            findBadSubString("//DTD", s, len) ||
            findBadSubString("//W3C", s, len) ||
            findBadSubString("//EN", s, len)
            );
    }

    /* examine <!DOCTYPE> to identify version */
    public short findGivenVersion(Node doctype)
    {
        String p, s;
        int i, j;
        int len;
        String str1;
        String str2;

        /* if root tag for doctype isn't html give up now */
        str1 = getString(this.lexbuf, doctype.start, 5);
        if (wstrcasecmp(str1, "html ") != 0)
            return 0;

        if (!checkDocTypeKeyWords(doctype))
            Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);

        /* give up if all we are given is the system id for the doctype */
        str1 = getString(this.lexbuf, doctype.start + 5, 7);
        if (wstrcasecmp(str1, "SYSTEM ") == 0)
        {
            /* but at least ensure the case is correct */
            if (!str1.substring(0, 6).equals("SYSTEM"))
                System.arraycopy( getBytes("SYSTEM"), 0,
                                  this.lexbuf, doctype.start + 5, 6 );
            return 0;  /* unrecognized */
        }

        if (wstrcasecmp(str1, "PUBLIC ") == 0)
        {
            if (!str1.substring(0, 6).equals("PUBLIC"))
                System.arraycopy( getBytes("PUBLIC "), 0,
                                  this.lexbuf, doctype.start + 5, 6 );
        }
        else
            this.badDoctype = true;

        for (i = doctype.start; i < doctype.end; ++i)
        {
            if (this.lexbuf[i] == (byte)'"')
            {
                str1 = getString( this.lexbuf, i + 1, 12 );
                str2 = getString( this.lexbuf, i + 1, 13 );
                if (str1.equals("-//W3C//DTD "))
                {
                    /* compute length of identifier e.g. "HTML 4.0 Transitional" */
                    for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
                    len = j - i - 13;
                    p = getString( this.lexbuf, i + 13, len );

                    for (j = 1; j < W3CVersion.length; ++j)
                    {
                        s = W3CVersion[j].name;
                        if (len == s.length() && s.equals(p))
                            return W3CVersion[j].code;
                    }

                    /* else unrecognized version */
                }
                else if (str2.equals("-//IETF//DTD "))
                {
                    /* compute length of identifier e.g. "HTML 2.0" */
                    for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
                    len = j - i - 14;

                    p = getString( this.lexbuf, i + 14, len );
                    s = W3CVersion[0].name;
                    if (len == s.length() && s.equals(p))
                        return W3CVersion[0].code;

                    /* else unrecognized version */
                }
                break;
            }
        }

        return 0;
    }

    public void fixHTMLNameSpace(Node root, String profile)
    {
        Node node;
        AttVal prev, attr;

        for (node = root.content; 
                node != null && node.tag != TagTable.tagHtml; node = node.next);

        if (node != null)
        {
            prev = null;

            for (attr = node.attributes; attr != null; attr = attr.next)
            {
                if (attr.attribute.equals("xmlns"))
                    break;

                prev = attr;
            }

            if (attr != null)
            {
                if (!attr.value.equals(profile))
                {
                    Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
                    attr.value = new String(profile);
                }
            }
            else
            {
                attr = new AttVal( node.attributes, null, (int)'"',
                                   "xmlns", new String( profile ) );
                attr.dict =
                    AttributeTable.getDefaultAttributeTable().findAttribute( attr );
                node.attributes = attr;
            }
        }
    }

    public boolean setXHTMLDocType(Node root)
    {
        String fpi = " ";
        String sysid = "";
        String namespace = XHTML_NAMESPACE;
        Node doctype;

        doctype = root.findDocType();

        if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
        {
            if (doctype != null)
                Node.discardElement(doctype);
            return true;
        }

        if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
        {
            /* see what flavor of XHTML this document matches */
            if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
            {  /* use XHTML strict */
                fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
                sysid = voyager_strict;
            }
            else if ((this.versions & Dict.VERS_LOOSE) != 0)
            {
                fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
                sysid = voyager_loose;
            }
            else if ((this.versions & Dict.VERS_FRAMES) != 0)
            {   /* use XHTML frames */
                fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
                sysid = voyager_frameset;
            }
            else /* lets assume XHTML transitional */
            {
                fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
                sysid = voyager_loose;
            }
        }
        else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
        {
            fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
            sysid = voyager_strict;
        }
        else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
        {
            fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
            sysid = voyager_loose;
        }

        fixHTMLNameSpace(root, namespace);

        if (doctype == null)
        {
            doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
            doctype.next = root.content;
            doctype.parent = root;
            doctype.prev = null;
            root.content = doctype;
        }

        if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
            configuration.docTypeStr != null)
        {
            fpi = configuration.docTypeStr;
            sysid = "";
        }

        this.txtstart = this.lexsize;
        this.txtend = this.lexsize;

        /* add public identifier */
        addStringLiteral("html PUBLIC ");

        /* check if the fpi is quoted or not */
        if (fpi.charAt(0) == '"')
            addStringLiteral(fpi);
        else
        {
            addStringLiteral("\"");
            addStringLiteral(fpi);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -