📄 lexer.java
字号:
throws ParserException { return (parseCDATA (false)); } /** * Return CDATA as a text node. * Slightly less rigid than {@link #parseCDATA()} this method provides for * parsing CDATA that may contain quoted strings that have embedded * ETAGO ("</") delimiters and skips single and multiline comments. * @param quotesmart If <code>true</code> the strict definition of CDATA is * extended to allow for single or double quoted ETAGO ("</") sequences. * @return The <code>TextNode</code> of the CDATA or <code>null</code> if none. * @see #parseCDATA() * @exception ParserException If a problem occurs reading from the source. */ public Node parseCDATA (boolean quotesmart) throws ParserException { int start; int state; boolean done; char quote; char ch; int end; boolean comment; start = mCursor.getPosition (); state = 0; done = false; quote = 0; comment = false; while (!done) { ch = mPage.getCharacter (mCursor); switch (state) { case 0: // prior to ETAGO switch (ch) { case Page.EOF: done = true; break; case '\'': if (quotesmart && !comment) if (0 == quote) quote = '\''; // enter quoted state else if ('\'' == quote) quote = 0; // exit quoted state break; case '"': if (quotesmart && !comment) if (0 == quote) quote = '"'; // enter quoted state else if ('"' == quote) quote = 0; // exit quoted state break; case '\\': if (quotesmart) if (0 != quote) { ch = mPage.getCharacter (mCursor); // try to consume escaped character if (Page.EOF == ch) done = true; else if ( (ch != '\\') && (ch != quote)) // unconsume char if character was not an escapable char. mPage.ungetCharacter (mCursor); } break; case '/': if (quotesmart) if (0 == quote) { // handle multiline and double slash comments (with a quote) ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('/' == ch) comment = true; else if ('*' == ch) { do { do ch = mPage.getCharacter (mCursor); while ((Page.EOF != ch) && ('*' != ch)); ch = mPage.getCharacter (mCursor); if (ch == '*') mPage.ungetCharacter (mCursor); } while ((Page.EOF != ch) && ('/' != ch)); } else mPage.ungetCharacter (mCursor); } break; case '\n': comment = false; break; case '<': if (quotesmart) { if (0 == quote) state = 1; } else state = 1; break; default: break; } break; case 1: // < switch (ch) { case Page.EOF: done = true; break; case '/': state = 2; break; case '!': ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('-' == ch) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('-' == ch) state = 3; else state = 0; } else state = 0; break; default: state = 0; break; } break; case 2: // </ comment = false; if (Page.EOF == ch) done = true; else if (Character.isLetter (ch)) { done = true; // back up to the start of ETAGO mPage.ungetCharacter (mCursor); mPage.ungetCharacter (mCursor); mPage.ungetCharacter (mCursor); } else state = 0; break; case 3: // <! comment = false; if (Page.EOF == ch) done = true; else if ('-' == ch) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('-' == ch) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('>' == ch) state = 0; else { mPage.ungetCharacter (mCursor); mPage.ungetCharacter (mCursor); } } else mPage.ungetCharacter (mCursor); } break; default: throw new IllegalStateException ("how the fuck did we get in state " + state); } } end = mCursor.getPosition (); return (makeString (start, end)); } // // NodeFactory interface // /** * Create a new string node. * @param page The page the node is on. * @param start The beginning position of the string. * @param end The ending positiong of the string. * @return The created Text node. */ public Text createStringNode (Page page, int start, int end) { return (new TextNode (page, start, end)); } /** * Create a new remark node. * @param page The page the node is on. * @param start The beginning position of the remark. * @param end The ending positiong of the remark. * @return The created Remark node. */ public Remark createRemarkNode (Page page, int start, int end) { return (new RemarkNode (page, start, end)); } /** * Create a new tag node. * Note that the attributes vector contains at least one element, * which is the tag name (standalone attribute) at position zero. * This can be used to decide which type of node to create, or * gate other processing that may be appropriate. * @param page The page the node is on. * @param start The beginning position of the tag. * @param end The ending positiong of the tag. * @param attributes The attributes contained in this tag. * @return The created Tag node. */ public Tag createTagNode (Page page, int start, int end, Vector attributes) { return (new TagNode (page, start, end, attributes)); } // // Internal methods // /** * Advance the cursor through a JIS escape sequence. * @param cursor A cursor positioned within the escape sequence. * @exception ParserException If a problem occurs reading from the source. */ protected void scanJIS (Cursor cursor) throws ParserException { boolean done; char ch; int state; done = false; state = 0; while (!done) { ch = mPage.getCharacter (cursor); if (Page.EOF == ch) done = true; else switch (state) { case 0: if (0x1b == ch) // escape state = 1; break; case 1: if ('(' == ch) state = 2; else state = 0; break; case 2: if ('J' == ch) done = true; else state = 0; break; default: throw new IllegalStateException ("state " + state); } } } /** * Parse a string node. * Scan characters until "</", "<%", "<!" or < followed by a * letter is encountered, or the input stream is exhausted, in which * case <code>null</code> is returned. * @param start The position at which to start scanning. * @param quotesmart If <code>true</code>, strings ignore quoted contents. * @return The parsed node. * @exception ParserException If a problem occurs reading from the source. */ protected Node parseString (int start, boolean quotesmart) throws ParserException { boolean done; char ch; char quote; done = false; quote = 0; while (!done) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if (0x1b == ch) // escape { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('$' == ch) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('B' == ch) scanJIS (mCursor); else { mPage.ungetCharacter (mCursor); mPage.ungetCharacter (mCursor); } } else mPage.ungetCharacter (mCursor); } else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch))) quote = ch; // enter quoted state // patch from Gernot Fricke to handle escaped closing quote else if (quotesmart && (0 != quote) && ('\\' == ch)) { ch = mPage.getCharacter (mCursor); // try to consume escape if ((Page.EOF != ch) && ('\\' != ch) // escaped backslash && (ch != quote)) // escaped quote character // ( reflects ["] or ['] whichever opened the quotation) mPage.ungetCharacter (mCursor); // unconsume char if char not an escape } else if (quotesmart && (ch == quote)) quote = 0; // exit quoted state else if (quotesmart && (0 == quote) && (ch == '/')) { // handle multiline and double slash comments (with a quote) // in script like: // I can't handle single quotations. ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('/' == ch) { do ch = mPage.getCharacter (mCursor); while ((Page.EOF != ch) && ('\n' != ch)); } else if ('*' == ch) { do { do ch = mPage.getCharacter (mCursor); while ((Page.EOF != ch) && ('*' != ch)); ch = mPage.getCharacter (mCursor); if (ch == '*') mPage.ungetCharacter (mCursor); } while ((Page.EOF != ch) && ('/' != ch)); } else mPage.ungetCharacter (mCursor); } else if ((0 == quote) && ('<' == ch)) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; // the order of these tests might be optimized for speed: else if ('/' == ch || Character.isLetter (ch) || '!' == ch || '%' == ch || '?' == ch) { done = true; mPage.ungetCharacter (mCursor); mPage.ungetCharacter (mCursor); } else { // it's not a tag, so keep going, but check for quotes mPage.ungetCharacter (mCursor); } } } return (makeString (start, mCursor.getPosition ())); } /** * Create a string node based on the current cursor and the one provided. * @param start The starting point of the node. * @param end The ending point of the node. * @exception ParserException If the nodefactory creation of the text * node fails. * @return The new Text node. */ protected Node makeString (int start, int end) throws ParserException { int length; Node ret; length = end - start; if (0 != length)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -