📄 lexer.java

📁 html 解析处理代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
                                state = 2;                            }                                                }                        else                            return (parseString (start, quotesmart));                        break;                    case 2: // prior to the first closing delimiter                        if ('-' == ch)                            state = 3;                        else if (Page.EOF == ch)                            return (parseString (start, quotesmart)); // no terminator                        break;                    case 3: // prior to the second closing delimiter                        if ('-' == ch)                            state = 4;                        else                            state = 2;                        break;                    case 4: // prior to the terminating >                        if ('>' == ch)                            done = true;                        else if (Character.isWhitespace (ch))                        {                            // stay in state 4                        }                        else                            if (!STRICT_REMARKS && (('-' == ch) || ('!' == ch)))                            {                                // stay in state 4                            }                            else                                // bug #1345049 HTMLParser should not terminate a comment with --->                                // should maybe issue a warning mentioning STRICT_REMARKS                                state = 2;                        break;                    default:                        throw new IllegalStateException ("how the fuck did we get in state " + state);                }        }        return (makeRemark (start, mCursor.getPosition ()));    }    /**     * Create a remark node based on the current cursor and the one provided.     * @param start The starting point of the node.     * @param end The ending point of the node.     * @exception ParserException If the nodefactory creation of the remark node fails.     * @return The new Remark node.     */    protected Node makeRemark (int start, int end)        throws            ParserException    {        int length;        Node ret;        length = end - start;        if (0 != length)        {   // return tag based on second character, '/', '%', Letter (ch), '!'            if (2 > length)                // this is an error                return (makeString (start, end));            ret = getNodeFactory ().createRemarkNode (this.getPage (), start, end);        }        else            ret = null;                return (ret);    }    /**     * Parse a java server page node.     * Scan characters until "%&gt;" is encountered, or the input stream is     * exhausted, in which case <code>null</code> is returned.     * @param start The position at which to start scanning.     * @return The parsed node.     * @exception ParserException If a problem occurs reading from the source.     */    protected Node parseJsp (int start)        throws            ParserException    {        boolean done;        char ch;        int state;        Vector attributes;        int code;        done = false;        state = 0;        code = 0;        attributes = new Vector ();        // <%xyz%>        // 012223d        // <%=xyz%>        // 0122223d        // <%@xyz%d        // 0122223d        while (!done)        {            ch = mPage.getCharacter (mCursor);            switch (state)            {                case 0: // prior to the percent                    switch (ch)                    {                        case '%': // <%                            state = 1;                            break;                        // case Page.EOF: // <\0                        // case '>': // <>                        default:                            done = true;                            break;                    }                    break;                case 1: // prior to the optional qualifier                    switch (ch)                    {                        case Page.EOF:   // <%\0                        case '>': // <%>                            done = true;                            break;                        case '=': // <%=                        case '@': // <%@                            code = mCursor.getPosition ();                            attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0));                            state = 2;                            break;                        default:  // <%x                            code = mCursor.getPosition () - 1;                            attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0));                            state = 2;                            break;                    }                    break;                case 2: // prior to the closing percent                    switch (ch)                    {                        case Page.EOF: // <%x\0                        case '>': // <%x>                            done = true;                            break;                        case '\'':                        case '"':// <%???"                            state = ch;                            break;                        case '%': // <%???%                            state = 3;                            break;                        case '/': // // or /*                            ch = mPage.getCharacter (mCursor);                            if (ch == '/')                             {   // find the \n or \r                                while(true)                                {                                    ch = mPage.getCharacter (mCursor);                                    if (ch == Page.EOF)                                    {                                        done = true;                                        break;                                    }                                    else if (ch == '\n' || ch == '\r')                                    {                                        break;                                    }                                }                            }                            else if (ch == '*')                            {                                do                                {                                    do                                        ch = mPage.getCharacter (mCursor);                                    while ((Page.EOF != ch) && ('*' != ch));                                    ch = mPage.getCharacter (mCursor);                                    if (ch == '*')                                        mPage.ungetCharacter (mCursor);                                }                                while ((Page.EOF != ch) && ('/' != ch));                            }                            else                                mPage.ungetCharacter (mCursor);                            break;                        default:  // <%???x                            break;                    }                    break;                case 3:                    switch (ch)                    {                        case Page.EOF: // <%x??%\0                            done = true;                            break;                        case '>':                            state = 4;                            done = true;                            break;                        default:  // <%???%x                            state = 2;                            break;                    }                    break;                case '"':                    switch (ch)                    {                        case Page.EOF: // <%x??"\0                            done = true;                            break;                        case '"':                            state = 2;                            break;                        default:  // <%???'??x                            break;                    }                    break;                case '\'':                    switch (ch)                    {                        case Page.EOF: // <%x??'\0                            done = true;                            break;                        case '\'':                            state = 2;                            break;                        default:  // <%???"??x                            break;                    }                    break;                default:                    throw new IllegalStateException ("how the fuck did we get in state " + state);            }        }        if (4 == state) // normal exit        {            if (0 != code)            {                state = mCursor.getPosition () - 2; // reuse state                attributes.addElement (new PageAttribute (mPage, code, state, -1, -1, (char)0));                attributes.addElement (new PageAttribute (mPage, state, state + 1, -1, -1, (char)0));            }            else                throw new IllegalStateException ("jsp with no code!");        }        else            return (parseString (start, true)); // hmmm, true?        return (makeTag (start, mCursor.getPosition (), attributes));    }    /**     * Parse an XML processing instruction.     * Scan characters until "?&gt;" is encountered, or the input stream is     * exhausted, in which case <code>null</code> is returned.     * @param start The position at which to start scanning.     * @return The parsed node.     * @exception ParserException If a problem occurs reading from the source.     */    protected Node parsePI (int start)        throws            ParserException    {        boolean done;        char ch;        int state;        Vector attributes;        int code;        done = false;        state = 0;        code = 0;        attributes = new Vector ();        // <?xyz?>        // 011112d        while (!done)        {            ch = mPage.getCharacter (mCursor);            switch (state)            {                case 0: // prior to the question mark                    switch (ch)                    {                        case '?': // <?                            code = mCursor.getPosition ();                            attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0));                            state = 1;                            break;                        // case Page.EOF: // <\0                        // case '>': // <>                        default:                            done = true;                            break;                    }                    break;                case 1: // prior to the closing question mark                    switch (ch)                    {                        case Page.EOF: // <?x\0                        case '>': // <?x>                            done = true;                            break;                        case '\'':                        case '"':// <?..."                            state = ch;                            break;                        case '?': // <?...?                            state = 2;                            break;                        default:  // <?...x                            break;                    }                    break;                case 2:                    switch (ch)                    {                        case Page.EOF: // <?x..?\0                            done = true;                            break;                        case '>':                            state = 3;                            done = true;                            break;                        default:  // <?...?x                            state = 1;                            break;                    }                    break;                case '"':                    switch (ch)                    {                        case Page.EOF: // <?x.."\0                            done = true;                            break;                        case '"':                            state = 1;                            break;                        default:  // <?...'.x                            break;                    }                    break;                case '\'':                    switch (ch)                    {                        case Page.EOF: // <?x..'\0                            done = true;                            break;                        case '\'':                            state = 1;                            break;                        default:  // <?..."..x                            break;                    }                    break;                default:                    throw new IllegalStateException ("how the fuck did we get in state " + state);            }        }        if (3 == state) // normal exit        {            if (0 != code)            {                state = mCursor.getPosition () - 2; // reuse state                attributes.addElement (new PageAttribute (mPage, code, state, -1, -1, (char)0));                attributes.addElement (new PageAttribute (mPage, state, state + 1, -1, -1, (char)0));            }            else                throw new IllegalStateException ("processing instruction with no content");        }        else            return (parseString (start, true)); // hmmm, true?        return (makeTag (start, mCursor.getPosition (), attributes));    }    //    // Main program    //    /**     * Mainline for command line operation     * @param args [0] The URL to parse.     * @exception MalformedURLException If the provided URL cannot be resolved.     * @exception ParserException If the parse fails.     */    public static void main (String[] args)        throws            MalformedURLException,            ParserException    {        ConnectionManager manager;        Lexer lexer;        Node node;        if (0 >= args.length)        {            System.out.println ("HTML Lexer v" + getVersion () + "\n");            System.out.println ();            System.out.println ("usage: java -jar htmllexer.jar <url>");        }        else        {            try            {                manager = Page.getConnectionManager ();                lexer = new Lexer (manager.openConnection (args[0]));                while (null != (node = lexer.nextNode (false)))                    System.out.println (node.toString ());            }            catch (ParserException pe)            {                System.out.println (pe.getMessage ());                if (null != pe.getThrowable ())                    System.out.println (pe.getThrowable ().getMessage ());            }        }    }}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -