📄 lexer.java

📁 windows 代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
        int c = 0;
        int lastc;
        int badcomment = 0;
        MutableBoolean isempty = new MutableBoolean();
        AttVal attributes;

        if (this.pushed)
        {
            /* duplicate inlines in preference to pushed text nodes when appropriate */
            if (this.token.type != Node.TextNode ||
                (this.insert == -1 && this.inode == null))
            {
                this.pushed = false;
                return this.token;
            }
        }

        /* at start of block elements, unclosed inline
           elements are inserted into the token stream */
     
        if (this.insert != -1 || this.inode != null)
            return insertedToken();

        this.lines = this.in.curline;
        this.columns = this.in.curcol;
        this.waswhite = false;

        this.txtstart = this.lexsize;
        this.txtend = this.lexsize;

        while (true)
        {
            c = this.in.readChar();
            if (c == StreamIn.EndOfStream) break;
            if (this.insertspace && mode != IgnoreWhitespace)
            {
                addCharToLexer(' ');
                this.waswhite = true;
                this.insertspace = false;
            }

            /* treat \r\n as \n and \r as \n */

            if (c == '\r')
            {
                c = this.in.readChar();

                if (c != '\n')
                    this.in.ungetChar(c);

                c = '\n';
            }

            addCharToLexer(c);

            switch (this.state)
            {
            case LEX_CONTENT:  /* element content */
                map = MAP((char)c);

                /*
                 Discard white space if appropriate. Its cheaper
                 to do this here rather than in parser methods
                 for elements that don't have mixed content.
                */
                if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) 
                      && this.lexsize == this.txtstart + 1)
                {
                    --this.lexsize;
                    this.waswhite = false;
                    this.lines = this.in.curline;
                    this.columns = this.in.curcol;
                    continue;
                }

                if (c == '<')
                {
                    this.state = LEX_GT;
                    continue;
                }

                if ((map & WHITE) != 0)
                {
                    /* was previous char white? */
                    if (this.waswhite)
                    {
                        if (mode != Preformatted && mode != IgnoreMarkup)
                        {
                            --this.lexsize;
                            this.lines = this.in.curline;
                            this.columns = this.in.curcol;
                        }
                    }
                    else /* prev char wasn't white */
                    {
                        this.waswhite = true;
                        lastc = c;

                        if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
                            changeChar((byte)' ');
                    }

                    continue;
                }
                else if (c == '&' && mode != IgnoreMarkup)
                    parseEntity(mode);

                /* this is needed to avoid trimming trailing whitespace */
                if (mode == IgnoreWhitespace)
                    mode = MixedContent;

                this.waswhite = false;
                continue;

            case LEX_GT:  /* < */

                /* check for endtag */
                if (c == '/')
                {
                    c = this.in.readChar();
                    if (c == StreamIn.EndOfStream)
                    {
                        this.in.ungetChar(c);
                        continue;
                    }

                    addCharToLexer(c);
                    map = MAP((char)c);

                    if ((map & LETTER) != 0)
                    {
                        this.lexsize -= 3;
                        this.txtend = this.lexsize;
                        this.in.ungetChar(c);
                        this.state = LEX_ENDTAG;
                        this.lexbuf[this.lexsize] = (byte)'\0';  /* debug */
                        this.in.curcol -= 2;

                        /* if some text before the </ return it now */
                        if (this.txtend > this.txtstart)
                        {
                            /* trim space char before end tag */
                            if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ')
                            {
                                this.lexsize -= 1;
                                this.txtend = this.lexsize;
                            }

                            this.token = newNode(Node.TextNode,
                                                  this.lexbuf,
                                                  this.txtstart,
                                                  this.txtend);
                            return this.token;
                        }

                        continue;       /* no text so keep going */
                    }

                    /* otherwise treat as CDATA */
                    this.waswhite = false;
                    this.state = LEX_CONTENT;
                    continue;
                }

                if (mode == IgnoreMarkup)
                {
                    /* otherwise treat as CDATA */
                    this.waswhite = false;
                    this.state = LEX_CONTENT;
                    continue;
                }

                /*
                   look out for comments, doctype or marked sections
                   this isn't quite right, but its getting there ...
                */
                if (c == '!')
                {
                    c = this.in.readChar();

                    if (c == '-')
                    {
                        c = this.in.readChar();

                        if (c == '-')
                        {
                            this.state = LEX_COMMENT;  /* comment */
                            this.lexsize -= 2;
                            this.txtend = this.lexsize;

                            /* if some text before < return it now */
                            if (this.txtend > this.txtstart)
                            {
                                this.token = newNode(Node.TextNode,
                                                      this.lexbuf,
                                                      this.txtstart,
                                                      this.txtend);
                                return this.token;
                            }

                            this.txtstart = this.lexsize;
                            continue;
                        }

                        Report.warning(this, null, null, Report.MALFORMED_COMMENT);
                    }
                    else if (c == 'd' || c == 'D')
                    {
                        this.state = LEX_DOCTYPE; /* doctype */
                        this.lexsize -= 2;
                        this.txtend = this.lexsize;
                        mode = IgnoreWhitespace;

                        /* skip until white space or '>' */

                        for (;;)
                        {
                            c = this.in.readChar();

                            if (c == StreamIn.EndOfStream || c == '>')
                            {
                                this.in.ungetChar(c);
                                break;
                            }

                            map = MAP((char)c);

                            if ((map & WHITE) == 0)
                                continue;

                            /* and skip to end of whitespace */

                            for (;;)
                            {
                                c = this.in.readChar();

                                if (c == StreamIn.EndOfStream || c == '>')
                                {
                                    this.in.ungetChar(c);
                                    break;
                                }

                                map = MAP((char)c);

                                if ((map & WHITE) != 0)
                                    continue;

                                this.in.ungetChar(c);
                                    break;
                            }

                            break;
                        }

                        /* if some text before < return it now */
                        if (this.txtend > this.txtstart)
                        {
                                this.token = newNode(Node.TextNode,
                                                      this.lexbuf,
                                                      this.txtstart,
                                                      this.txtend);
                                return this.token;
                        }

                        this.txtstart = this.lexsize;
                        continue;
                    }
                    else if (c == '[')
                    {
                        /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
                        this.lexsize -= 2;
                        this.state = LEX_SECTION;
                        this.txtend = this.lexsize;

                        /* if some text before < return it now */
                        if (this.txtend > this.txtstart)
                        {
                                this.token = newNode(Node.TextNode,
                                                      this.lexbuf,
                                                      this.txtstart,
                                                      this.txtend);
                                return this.token;
                        }

                        this.txtstart = this.lexsize;
                        continue;
                    }

                    /* otherwise swallow chars up to and including next '>' */
                    while (true)
                    {
                        c = this.in.readChar();
                        if (c == '>') break;
                        if (c == -1)
                        {
                            this.in.ungetChar(c);
                            break;
                        }
                    }

                    this.lexsize -= 2;
                    this.lexbuf[this.lexsize] = (byte)'\0';
                    this.state = LEX_CONTENT;
                    continue;
                }

                /*
                   processing instructions
                */

                if (c == '?')
                {
                    this.lexsize -= 2;
                    this.state = LEX_PROCINSTR;
                    this.txtend = this.lexsize;

                    /* if some text before < return it now */
                    if (this.txtend > this.txtstart)
                    {
                        this.token = newNode(Node.TextNode,
                                              this.lexbuf,
                                              this.txtstart,
                                              this.txtend);
                        return this.token;
                    }

                    this.txtstart = this.lexsize;
                    continue;
                }

                /* Microsoft ASP's e.g. <% ... server-code ... %> */
                if (c == '%')
                {
                    this.lexsize -= 2;
                    this.state = LEX_ASP;
                    this.txtend = this.lexsize;

                    /* if some text before < return it now */
                    if (this.txtend > this.txtstart)
                    {
                        this.token = newNode(Node.TextNode,
                                              this.lexbuf,
                                              this.txtstart,
                                              this.txtend);
                        return this.token;
                    }

                    this.txtstart = this.lexsize;
                    continue;
                }

                /* Netscapes JSTE e.g. <# ... server-code ... #> */
                if (c == '#')
                {
                    this.lexsize -= 2;
                    this.state = LEX_JSTE;
                    this.txtend = this.lexsize;

                    /* if some text before < return it now */
                    if (this.txtend > this.txtstart)
                    {
                        this.token = newNode(Node.TextNode,
                                              this.lexbuf,
                                              this.txtstart,
                                              this.txtend);
                        return this.token;
                    }

                    this.txtstart = this.lexsize;
                    continue;
                }

                map = MAP((char)c);

                /* check for start tag */
                if ((map & LETTER) != 0)
                {
                    this.in.ungetChar(c);     /* push back letter */
                    this.lexsize -= 2;      /* discard "<" + letter */
                    this.txtend = this.lexsize;
                    this.state = LEX_STARTTAG;         /* ready to read tag name */

                    /* if some text before < return it now */
                    if (this.txtend > this.txtstart)
                    {
                        this.token = newNode(Node.TextNode,
                                              this.lexbuf,
                                              this.txtstart,
                                              this.txtend);
                        return this.token;
                    }

                    continue;       /* no text so keep going */
                }

                /* otherwise treat as CDATA */
                this.state = LEX_CONTENT;
                this.waswhite = false;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -