📄 lexer.java
字号:
ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if (0x1b == ch) // escape { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('$' == ch) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('B' == ch) scanJIS (mCursor); else { mCursor.retreat (); mCursor.retreat (); } } else mCursor.retreat (); } else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch))) quote = ch; // enter quoted state // patch from Gernot Fricke to handle escaped closing quote else if (quotesmart && (0 != quote) && ('\\' == ch)) { ch = mPage.getCharacter (mCursor); // try to consume escape if ((Page.EOF != ch) && ('\\' != ch) // escaped backslash && (ch != quote)) // escaped quote character // ( reflects ["] or ['] whichever opened the quotation) mCursor.retreat(); // unconsume char if char not an escape } else if (quotesmart && (ch == quote)) quote = 0; // exit quoted state else if (quotesmart && (0 == quote) && (ch == '/')) { // handle multiline and double slash comments (with a quote) // in script like: // I can't handle single quotations. ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; else if ('/' == ch) { do ch = mPage.getCharacter (mCursor); while ((Page.EOF != ch) && ('\n' != ch)); } else if ('*' == ch) { do { do ch = mPage.getCharacter (mCursor); while ((Page.EOF != ch) && ('*' != ch)); ch = mPage.getCharacter (mCursor); if (ch == '*') mCursor.retreat (); } while ((Page.EOF != ch) && ('/' != ch)); } else mCursor.retreat (); } else if ((0 == quote) && ('<' == ch)) { ch = mPage.getCharacter (mCursor); if (Page.EOF == ch) done = true; // the order of these tests might be optimized for speed: else if ('/' == ch || Character.isLetter (ch) || '!' == ch || '%' == ch || '?' == ch) { done = true; mCursor.retreat (); mCursor.retreat (); } else { // it's not a tag, so keep going, but check for quotes mCursor.retreat (); } } } return (makeString (start, mCursor.getPosition ())); } /** * Create a string node based on the current cursor and the one provided. * @param start The starting point of the node. * @param end The ending point of the node. * @exception ParserException If the nodefactory creation of the text * node fails. * @return The new Text node. */ protected Node makeString (int start, int end) throws ParserException { int length; Node ret; length = end - start; if (0 != length) // got some characters ret = getNodeFactory ().createStringNode ( this.getPage (), start, end); else ret = null; return (ret); } /** * Generate a whitespace 'attribute', * @param attributes The list so far. * @param bookmarks The array of positions. */ private void whitespace (Vector attributes, int[] bookmarks) { if (bookmarks[1] > bookmarks[0]) attributes.addElement (new PageAttribute ( mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); } /** * Generate a standalone attribute -- font. * @param attributes The list so far. * @param bookmarks The array of positions. */ private void standalone (Vector attributes, int[] bookmarks) { attributes.addElement (new PageAttribute ( mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); } /** * Generate an empty attribute -- color=. * @param attributes The list so far. * @param bookmarks The array of positions. */ private void empty (Vector attributes, int[] bookmarks) { attributes.addElement (new PageAttribute ( mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); } /** * Generate an unquoted attribute -- size=1. * @param attributes The list so far. * @param bookmarks The array of positions. */ private void naked (Vector attributes, int[] bookmarks) { attributes.addElement (new PageAttribute ( mPage, bookmarks[1], bookmarks[2], bookmarks[3], bookmarks[4], (char)0)); } /** * Generate an single quoted attribute -- width='100%'. * @param attributes The list so far. * @param bookmarks The array of positions. */ private void single_quote (Vector attributes, int[] bookmarks) { attributes.addElement (new PageAttribute ( mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, bookmarks[5], '\'')); } /** * Generate an double quoted attribute -- CONTENT="Test Development". * @param attributes The list so far. * @param bookmarks The array of positions. */ private void double_quote (Vector attributes, int[] bookmarks) { attributes.addElement (new PageAttribute ( mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, bookmarks[6], '"')); } /** * Parse a tag. * Parse the name and attributes from a start tag.<p> * From the <a href="http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2"> * HTML 4.01 Specification, W3C Recommendation 24 December 1999</a> * http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2<p> * <cite> * 3.2.2 Attributes<p> * Elements may have associated properties, called attributes, which may * have values (by default, or set by authors or scripts). Attribute/value * pairs appear before the final ">" of an element's start tag. Any number * of (legal) attribute value pairs, separated by spaces, may appear in an * element's start tag. They may appear in any order.<p> * In this example, the id attribute is set for an H1 element: * <code> * <H1 id="section1"> * </code> * This is an identified heading thanks to the id attribute * <code> * </H1> * </code> * By default, SGML requires that all attribute values be delimited using * either double quotation marks (ASCII decimal 34) or single quotation * marks (ASCII decimal 39). Single quote marks can be included within the * attribute value when the value is delimited by double quote marks, and * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). * For doublequotes authors can also use the character entity reference * &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters * (a-z and A-Z), digits (0-9), hyphens (ASCII decimal 45), * periods (ASCII decimal 46), underscores (ASCII decimal 95), * and colons (ASCII decimal 58). We recommend using quotation marks even * when it is possible to eliminate them.<p> * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each * attribute in the reference manual indicates whether its value is * case-insensitive.<p> * All the attributes defined by this specification are listed in the * attribute index.<p> * </cite> * <p> * This method uses a state machine with the following states: * <ol> * <li>state 0 - outside of any attribute</li> * <li>state 1 - within attributre name</li> * <li>state 2 - equals hit</li> * <li>state 3 - within naked attribute value.</li> * <li>state 4 - within single quoted attribute value</li> * <li>state 5 - within double quoted attribute value</li> * <li>state 6 - whitespaces after attribute name could lead to state 2 (=)or state 0</li> * </ol> * <p> * The starting point for the various components is stored in an array * of integers that match the initiation point for the states one-for-one, * i.e. bookmarks[0] is where state 0 began, bookmarks[1] is where state 1 * began, etc. * Attributes are stored in a <code>Vector</code> having * one slot for each whitespace or attribute/value pair. * The first slot is for attribute name (kind of like a standalone attribute). * @param start The position at which to start scanning. * @return The parsed tag. * @exception ParserException If a problem occurs reading from the source. */ protected Node parseTag (int start) throws ParserException { boolean done; char ch; int state; int[] bookmarks; Vector attributes; done = false; attributes = new Vector (); state = 0; bookmarks = new int[8]; bookmarks[0] = mCursor.getPosition (); while (!done) { bookmarks[state + 1] = mCursor.getPosition (); ch = mPage.getCharacter (mCursor); switch (state) { case 0: // outside of any attribute if ((Page.EOF == ch) || ('>' == ch) || ('<' == ch)) { if ('<' == ch) { // don't consume the opening angle mCursor.retreat (); bookmarks[state + 1] = mCursor.getPosition (); } whitespace (attributes, bookmarks); done = true; } else if (!Character.isWhitespace (ch)) { whitespace (attributes, bookmarks); state = 1; } break; case 1: // within attribute name if ((Page.EOF == ch) || ('>' == ch) || ('<' == ch)) { if ('<' == ch) { // don't consume the opening angle mCursor.retreat (); bookmarks[state + 1] = mCursor.getPosition (); } standalone (attributes, bookmarks); done = true; } else if (Character.isWhitespace (ch)) { // whitespaces might be followed by next attribute or an equal sign // see Bug #891058 Bug in lexer. bookmarks[6] = bookmarks[2]; // setting the bookmark[0] is done in state 6 if applicable state = 6; } else if ('=' == ch) state = 2; break; case 2: // equals hit if ((Page.EOF == ch) || ('>' == ch)) { empty (attributes, bookmarks); done = true; } else if ('\'' == ch) { state = 4; bookmarks[4] = bookmarks[3]; } else if ('"' == ch) { state = 5; bookmarks[5] = bookmarks[3]; } else if (Character.isWhitespace (ch)) { // collect white spaces after "=" into the assignment string; // do nothing // see Bug #891058 Bug in lexer. } else state = 3; break; case 3: // within naked attribute value if ((Page.EOF == ch) || ('>' == ch)) { naked (attributes, bookmarks); done = true; } else if (Character.isWhitespace (ch)) { naked (attributes, bookmarks); bookmarks[0] = bookmarks[4]; state = 0; } break; case 4: // within single quoted attribute value if (Page.EOF == ch) { single_quote (attributes, bookmarks); done = true; // complain? } else if ('\'' == ch) { single_quote (attributes, bookmarks); bookmarks[0] = bookmarks[5] + 1; state = 0; } break; case 5: // within double quoted attribute value if (Page.EOF == ch) { double_quote (attributes, bookmarks); done = true; // complain? } else if ('"' == ch) { double_quote (attributes, bookmarks); bookmarks[0] = bookmarks[6] + 1; state = 0; } break; // patch for lexer state correction by // Gernot Fricke // See Bug # 891058 Bug in lexer. case 6: // undecided for state 0 or 2 // we have read white spaces after an attributte name if (Page.EOF == ch) { // same as last else clause standalone (attributes, bookmarks); bookmarks[0]=bookmarks[6]; mCursor.retreat(); state=0; } else if (Character.isWhitespace (ch)) { // proceed } else if ('=' == ch) // yepp. the white spaces belonged to the equal. { bookmarks[2] = bookmarks[6]; bookmarks[3] = bookmarks[7];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -