📄 lexer.java

📁 本程序用于对页面信息进行提取并分析
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
                        state=2;                    }                    else                    {                        // white spaces were not ended by equal                        // meaning the attribute was a stand alone attribute                        // now: create the stand alone attribute and rewind                         // the cursor to the end of the white spaces                        // and restart scanning as whitespace attribute.                  	    standalone (attributes, bookmarks);                  	    bookmarks[0]=bookmarks[6];                  	    mCursor.retreat();                  	    state=0;                   	}                    break;                default:                    throw new IllegalStateException ("how the fuck did we get in state " + state);            }        }        return (makeTag (start, mCursor.getPosition (), attributes));    }    /**     * Create a tag node based on the current cursor and the one provided.     * @param start The starting point of the node.     * @param end The ending point of the node.     * @param attributes The attributes parsed from the tag.     * @exception ParserException If the nodefactory creation of the tag node fails.     * @return The new Tag node.     */    protected Node makeTag (int start, int end, Vector attributes)        throws            ParserException    {        int length;        Node ret;        length = end - start;        if (0 != length)        {   // return tag based on second character, '/', '%', Letter (ch), '!'            if (2 > length)                // this is an error                return (makeString (start, end));            ret = getNodeFactory ().createTagNode (this.getPage (), start, end, attributes);        }        else            ret = null;        return (ret);    }    /**     * Parse a comment.     * Parse a remark markup.<p>     * From the <a href="http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.4">     * HTML 4.01 Specification, W3C Recommendation 24 December 1999</a>     * http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.4<p>     * <cite>     * 3.2.4 Comments<p>     * HTML comments have the following syntax:<p>     * <code>     * &lt;!-- this is a comment --&gt;<p>     * &lt;!-- and so is this one,<p>     *     which occupies more than one line --&gt;<p>     * </code>     * White space is not permitted between the markup declaration     * open delimiter("&lt;!") and the comment open delimiter ("--"),     * but is permitted between the comment close delimiter ("--") and     * the markup declaration close delimiter ("&gt;").     * A common error is to include a string of hyphens ("---") within a comment.     * Authors should avoid putting two or more adjacent hyphens inside comments.     * Information that appears between comments has no special meaning     * (e.g., character references are not interpreted as such).     * Note that comments are markup.<p>     * </cite>     * <p>     * This method uses a state machine with the following states:     * <ol>     * <li>state 0 - prior to the first open delimiter</li>     * <li>state 1 - prior to the second open delimiter</li>     * <li>state 2 - prior to the first closing delimiter</li>     * <li>state 3 - prior to the second closing delimiter</li>     * <li>state 4 - prior to the terminating &gt;</li>     * </ol>     * <p>     * All comment text (everything excluding the &lt; and &gt;), is included     * in the remark text.     * We allow terminators like --!&gt; even though this isn't part of the spec.     * @param start The position at which to start scanning.     * @param quotesmart If <code>true</code>, strings ignore quoted contents.     * @return The parsed node.     * @exception ParserException If a problem occurs reading from the source.     */    protected Node parseRemark (int start, boolean quotesmart)        throws            ParserException    {        boolean done;        char ch;        int state;        done = false;        state = 0;        while (!done)        {            ch = mPage.getCharacter (mCursor);            if (Page.EOF == ch)                done = true;            else                switch (state)                {                    case 0: // prior to the first open delimiter                        if ('>' == ch)                            done = true;                        if ('-' == ch)                            state = 1;                        else                            return (parseString (start, quotesmart));                        break;                    case 1: // prior to the second open delimiter                        if ('-' == ch)                        {                            // handle <!--> because netscape does                            ch = mPage.getCharacter (mCursor);                            if (Page.EOF == ch)                                done = true;                            else if ('>' == ch)                                done = true;                            else                            {                                mCursor.retreat ();                                state = 2;                            }                                                }                        else                            return (parseString (start, quotesmart));                        break;                    case 2: // prior to the first closing delimiter                        if ('-' == ch)                            state = 3;                        else if (Page.EOF == ch)                            return (parseString (start, quotesmart)); // no terminator                        break;                    case 3: // prior to the second closing delimiter                        if ('-' == ch)                            state = 4;                        else                            state = 2;                        break;                    case 4: // prior to the terminating >                        if ('>' == ch)                            done = true;                        else if (('!' == ch) || ('-' == ch) || Character.isWhitespace (ch))                        {                            // stay in state 4                        }                        else                            state = 2;                        break;                    default:                        throw new IllegalStateException ("how the fuck did we get in state " + state);                }        }        return (makeRemark (start, mCursor.getPosition ()));    }    /**     * Create a remark node based on the current cursor and the one provided.     * @param start The starting point of the node.     * @param end The ending point of the node.     * @exception ParserException If the nodefactory creation of the remark node fails.     * @return The new Remark node.     */    protected Node makeRemark (int start, int end)        throws            ParserException    {        int length;        Node ret;        length = end - start;        if (0 != length)        {   // return tag based on second character, '/', '%', Letter (ch), '!'            if (2 > length)                // this is an error                return (makeString (start, end));            ret = getNodeFactory ().createRemarkNode (this.getPage (), start, end);        }        else            ret = null;                return (ret);    }    /**     * Parse a java server page node.     * Scan characters until "%&gt;" is encountered, or the input stream is     * exhausted, in which case <code>null</code> is returned.     * @param start The position at which to start scanning.     * @return The parsed node.     * @exception ParserException If a problem occurs reading from the source.     */    protected Node parseJsp (int start)        throws            ParserException    {        boolean done;        char ch;        int state;        Vector attributes;        int code;        done = false;        state = 0;        code = 0;        attributes = new Vector ();        // <%xyz%>        // 012223d        // <%=xyz%>        // 0122223d        // <%@xyz%d        // 0122223d        while (!done)        {            ch = mPage.getCharacter (mCursor);            switch (state)            {                case 0: // prior to the percent                    switch (ch)                    {                        case '%': // <%                            state = 1;                            break;                        // case Page.EOF: // <\0                        // case '>': // <>                        default:                            done = true;                            break;                    }                    break;                case 1: // prior to the optional qualifier                    switch (ch)                    {                        case Page.EOF:   // <%\0                        case '>': // <%>                            done = true;                            break;                        case '=': // <%=                        case '@': // <%@                            code = mCursor.getPosition ();                            attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0));                            state = 2;                            break;                        default:  // <%x                            code = mCursor.getPosition () - 1;                            attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0));                            state = 2;                            break;                    }                    break;                case 2: // prior to the closing percent                    switch (ch)                    {                        case Page.EOF: // <%x\0                        case '>': // <%x>                            done = true;                            break;                        case '\'':                        case '"':// <%???"                            state = ch;                            break;                        case '%': // <%???%                            state = 3;                            break;                        case '/': // // or /*                            ch = mPage.getCharacter (mCursor);                            if (ch == '/')                             {   // find the \n or \r                                while(true)                                {                                    ch = mPage.getCharacter (mCursor);                                    if (ch == Page.EOF)                                    {                                        done = true;                                        break;                                    }                                    else if (ch == '\n' || ch == '\r')                                    {                                        break;                                    }                                }                            }                            else if (ch == '*')                            {                                do                                {                                    do                                        ch = mPage.getCharacter (mCursor);                                    while ((Page.EOF != ch) && ('*' != ch));                                    ch = mPage.getCharacter (mCursor);                                    if (ch == '*')                                        mCursor.retreat ();                                }                                while ((Page.EOF != ch) && ('/' != ch));                            }                            else                            {                                mCursor.retreat ();                            }                            break;                        default:  // <%???x                            break;                    }                    break;                case 3:                    switch (ch)                    {                        case Page.EOF: // <%x??%\0                            done = true;                            break;                        case '>':                            state = 4;                            done = true;                            break;                        default:  // <%???%x                            state = 2;                            break;                    }                    break;                case '"':                    switch (ch)                    {                        case Page.EOF: // <%x??"\0                            done = true;                            break;                        case '"':                            state = 2;                            break;                        default:  // <%???'??x                            break;                    }                    break;                case '\'':                    switch (ch)                    {                        case Page.EOF: // <%x??'\0                            done = true;                            break;                        case '\'':                            state = 2;                            break;                        default:  // <%???"??x                            break;                    }                    break;                default:                    throw new IllegalStateException ("how the fuck did we get in state " + state);            }        }        if (4 == state) // normal exit        {            if (0 != code)            {                state = mCursor.getPosition () - 2; // reuse state                attributes.addElement (new PageAttribute (mPage, code, state, -1, -1, (char)0));                attributes.addElement (new PageAttribute (mPage, state, state + 1, -1, -1, (char)0));            }            else                throw new IllegalStateException ("jsp with no code!");        }        else            return (parseString (start, true)); // hmmm, true?        return (makeTag (start, mCursor.getPosition (), attributes));    }    /**     * Parse an XML processing instruction.     * Scan characters until "?&gt;" is encountered, or the input stream is     * exhausted, in which case <code>null</code> is returned.     * @param start The position at which to start scanning.     * @return The parsed node.     * @exception ParserException If a problem occurs reading from the source.     */    protected Node parsePI (int start)        throws            ParserException    {        boolean done;        char ch;        int state;        Vector attributes;        int code;        done = false;        state = 0;        code = 0;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -