📄 lexer.java
字号:
state = 2; } } else return (parseString (start, quotesmart)); break; case 2: // prior to the first closing delimiter if ('-' == ch) state = 3; else if (Page.EOF == ch) return (parseString (start, quotesmart)); // no terminator break; case 3: // prior to the second closing delimiter if ('-' == ch) state = 4; else state = 2; break; case 4: // prior to the terminating > if ('>' == ch) done = true; else if (Character.isWhitespace (ch)) { // stay in state 4 } else if (!STRICT_REMARKS && (('-' == ch) || ('!' == ch))) { // stay in state 4 } else // bug #1345049 HTMLParser should not terminate a comment with ---> // should maybe issue a warning mentioning STRICT_REMARKS state = 2; break; default: throw new IllegalStateException ("how the fuck did we get in state " + state); } } return (makeRemark (start, mCursor.getPosition ())); } /** * Create a remark node based on the current cursor and the one provided. * @param start The starting point of the node. * @param end The ending point of the node. * @exception ParserException If the nodefactory creation of the remark node fails. * @return The new Remark node. */ protected Node makeRemark (int start, int end) throws ParserException { int length; Node ret; length = end - start; if (0 != length) { // return tag based on second character, '/', '%', Letter (ch), '!' if (2 > length) // this is an error return (makeString (start, end)); ret = getNodeFactory ().createRemarkNode (this.getPage (), start, end); } else ret = null; return (ret); } /** * Parse a java server page node. * Scan characters until "%>" is encountered, or the input stream is * exhausted, in which case <code>null</code> is returned. * @param start The position at which to start scanning. * @return The parsed node. * @exception ParserException If a problem occurs reading from the source. */ protected Node parseJsp (int start) throws ParserException { boolean done; char ch; int state; Vector attributes; int code; done = false; state = 0; code = 0; attributes = new Vector (); // <%xyz%> // 012223d // <%=xyz%> // 0122223d // <%@xyz%d // 0122223d while (!done) { ch = mPage.getCharacter (mCursor); switch (state) { case 0: // prior to the percent switch (ch) { case '%': // <% state = 1; break; // case Page.EOF: // <\0 // case '>': // <> default: done = true; break; } break; case 1: // prior to the optional qualifier switch (ch) { case Page.EOF: // <%\0 case '>': // <%> done = true; break; case '=': // <%= case '@': // <%@ code = mCursor.getPosition (); attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0)); state = 2; break; default: // <%x code = mCursor.getPosition () - 1; attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0)); state = 2; break; } break; case 2: // prior to the closing percent switch (ch) { case Page.EOF: // <%x\0 case '>': // <%x> done = true; break; case '\'': case '"':// <%???" state = ch; break; case '%': // <%???% state = 3; break; case '/': // // or /* ch = mPage.getCharacter (mCursor); if (ch == '/') { // find the \n or \r while(true) { ch = mPage.getCharacter (mCursor); if (ch == Page.EOF) { done = true; break; } else if (ch == '\n' || ch == '\r') { break; } } } else if (ch == '*') { do { do ch = mPage.getCharacter (mCursor); while ((Page.EOF != ch) && ('*' != ch)); ch = mPage.getCharacter (mCursor); if (ch == '*') mPage.ungetCharacter (mCursor); } while ((Page.EOF != ch) && ('/' != ch)); } else mPage.ungetCharacter (mCursor); break; default: // <%???x break; } break; case 3: switch (ch) { case Page.EOF: // <%x??%\0 done = true; break; case '>': state = 4; done = true; break; default: // <%???%x state = 2; break; } break; case '"': switch (ch) { case Page.EOF: // <%x??"\0 done = true; break; case '"': state = 2; break; default: // <%???'??x break; } break; case '\'': switch (ch) { case Page.EOF: // <%x??'\0 done = true; break; case '\'': state = 2; break; default: // <%???"??x break; } break; default: throw new IllegalStateException ("how the fuck did we get in state " + state); } } if (4 == state) // normal exit { if (0 != code) { state = mCursor.getPosition () - 2; // reuse state attributes.addElement (new PageAttribute (mPage, code, state, -1, -1, (char)0)); attributes.addElement (new PageAttribute (mPage, state, state + 1, -1, -1, (char)0)); } else throw new IllegalStateException ("jsp with no code!"); } else return (parseString (start, true)); // hmmm, true? return (makeTag (start, mCursor.getPosition (), attributes)); } /** * Parse an XML processing instruction. * Scan characters until "?>" is encountered, or the input stream is * exhausted, in which case <code>null</code> is returned. * @param start The position at which to start scanning. * @return The parsed node. * @exception ParserException If a problem occurs reading from the source. */ protected Node parsePI (int start) throws ParserException { boolean done; char ch; int state; Vector attributes; int code; done = false; state = 0; code = 0; attributes = new Vector (); // <?xyz?> // 011112d while (!done) { ch = mPage.getCharacter (mCursor); switch (state) { case 0: // prior to the question mark switch (ch) { case '?': // <? code = mCursor.getPosition (); attributes.addElement (new PageAttribute (mPage, start + 1, code, -1, -1, (char)0)); state = 1; break; // case Page.EOF: // <\0 // case '>': // <> default: done = true; break; } break; case 1: // prior to the closing question mark switch (ch) { case Page.EOF: // <?x\0 case '>': // <?x> done = true; break; case '\'': case '"':// <?..." state = ch; break; case '?': // <?...? state = 2; break; default: // <?...x break; } break; case 2: switch (ch) { case Page.EOF: // <?x..?\0 done = true; break; case '>': state = 3; done = true; break; default: // <?...?x state = 1; break; } break; case '"': switch (ch) { case Page.EOF: // <?x.."\0 done = true; break; case '"': state = 1; break; default: // <?...'.x break; } break; case '\'': switch (ch) { case Page.EOF: // <?x..'\0 done = true; break; case '\'': state = 1; break; default: // <?..."..x break; } break; default: throw new IllegalStateException ("how the fuck did we get in state " + state); } } if (3 == state) // normal exit { if (0 != code) { state = mCursor.getPosition () - 2; // reuse state attributes.addElement (new PageAttribute (mPage, code, state, -1, -1, (char)0)); attributes.addElement (new PageAttribute (mPage, state, state + 1, -1, -1, (char)0)); } else throw new IllegalStateException ("processing instruction with no content"); } else return (parseString (start, true)); // hmmm, true? return (makeTag (start, mCursor.getPosition (), attributes)); } // // Main program // /** * Mainline for command line operation * @param args [0] The URL to parse. * @exception MalformedURLException If the provided URL cannot be resolved. * @exception ParserException If the parse fails. */ public static void main (String[] args) throws MalformedURLException, ParserException { ConnectionManager manager; Lexer lexer; Node node; if (0 >= args.length) { System.out.println ("HTML Lexer v" + getVersion () + "\n"); System.out.println (); System.out.println ("usage: java -jar htmllexer.jar <url>"); } else { try { manager = Page.getConnectionManager (); lexer = new Lexer (manager.openConnection (args[0])); while (null != (node = lexer.nextNode (false))) System.out.println (node.toString ()); } catch (ParserException pe) { System.out.println (pe.getMessage ()); if (null != pe.getThrowable ()) System.out.println (pe.getThrowable ().getMessage ()); } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -