📄 lexer.java
字号:
int c = 0;
int lastc;
int badcomment = 0;
MutableBoolean isempty = new MutableBoolean();
AttVal attributes;
if (this.pushed)
{
/* duplicate inlines in preference to pushed text nodes when appropriate */
if (this.token.type != Node.TextNode ||
(this.insert == -1 && this.inode == null))
{
this.pushed = false;
return this.token;
}
}
/* at start of block elements, unclosed inline
elements are inserted into the token stream */
if (this.insert != -1 || this.inode != null)
return insertedToken();
this.lines = this.in.curline;
this.columns = this.in.curcol;
this.waswhite = false;
this.txtstart = this.lexsize;
this.txtend = this.lexsize;
while (true)
{
c = this.in.readChar();
if (c == StreamIn.EndOfStream) break;
if (this.insertspace && mode != IgnoreWhitespace)
{
addCharToLexer(' ');
this.waswhite = true;
this.insertspace = false;
}
/* treat \r\n as \n and \r as \n */
if (c == '\r')
{
c = this.in.readChar();
if (c != '\n')
this.in.ungetChar(c);
c = '\n';
}
addCharToLexer(c);
switch (this.state)
{
case LEX_CONTENT: /* element content */
map = MAP((char)c);
/*
Discard white space if appropriate. Its cheaper
to do this here rather than in parser methods
for elements that don't have mixed content.
*/
if (((map & WHITE) != 0) && (mode == IgnoreWhitespace)
&& this.lexsize == this.txtstart + 1)
{
--this.lexsize;
this.waswhite = false;
this.lines = this.in.curline;
this.columns = this.in.curcol;
continue;
}
if (c == '<')
{
this.state = LEX_GT;
continue;
}
if ((map & WHITE) != 0)
{
/* was previous char white? */
if (this.waswhite)
{
if (mode != Preformatted && mode != IgnoreMarkup)
{
--this.lexsize;
this.lines = this.in.curline;
this.columns = this.in.curcol;
}
}
else /* prev char wasn't white */
{
this.waswhite = true;
lastc = c;
if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
changeChar((byte)' ');
}
continue;
}
else if (c == '&' && mode != IgnoreMarkup)
parseEntity(mode);
/* this is needed to avoid trimming trailing whitespace */
if (mode == IgnoreWhitespace)
mode = MixedContent;
this.waswhite = false;
continue;
case LEX_GT: /* < */
/* check for endtag */
if (c == '/')
{
c = this.in.readChar();
if (c == StreamIn.EndOfStream)
{
this.in.ungetChar(c);
continue;
}
addCharToLexer(c);
map = MAP((char)c);
if ((map & LETTER) != 0)
{
this.lexsize -= 3;
this.txtend = this.lexsize;
this.in.ungetChar(c);
this.state = LEX_ENDTAG;
this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */
this.in.curcol -= 2;
/* if some text before the </ return it now */
if (this.txtend > this.txtstart)
{
/* trim space char before end tag */
if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ')
{
this.lexsize -= 1;
this.txtend = this.lexsize;
}
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
continue; /* no text so keep going */
}
/* otherwise treat as CDATA */
this.waswhite = false;
this.state = LEX_CONTENT;
continue;
}
if (mode == IgnoreMarkup)
{
/* otherwise treat as CDATA */
this.waswhite = false;
this.state = LEX_CONTENT;
continue;
}
/*
look out for comments, doctype or marked sections
this isn't quite right, but its getting there ...
*/
if (c == '!')
{
c = this.in.readChar();
if (c == '-')
{
c = this.in.readChar();
if (c == '-')
{
this.state = LEX_COMMENT; /* comment */
this.lexsize -= 2;
this.txtend = this.lexsize;
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
this.txtstart = this.lexsize;
continue;
}
Report.warning(this, null, null, Report.MALFORMED_COMMENT);
}
else if (c == 'd' || c == 'D')
{
this.state = LEX_DOCTYPE; /* doctype */
this.lexsize -= 2;
this.txtend = this.lexsize;
mode = IgnoreWhitespace;
/* skip until white space or '>' */
for (;;)
{
c = this.in.readChar();
if (c == StreamIn.EndOfStream || c == '>')
{
this.in.ungetChar(c);
break;
}
map = MAP((char)c);
if ((map & WHITE) == 0)
continue;
/* and skip to end of whitespace */
for (;;)
{
c = this.in.readChar();
if (c == StreamIn.EndOfStream || c == '>')
{
this.in.ungetChar(c);
break;
}
map = MAP((char)c);
if ((map & WHITE) != 0)
continue;
this.in.ungetChar(c);
break;
}
break;
}
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
this.txtstart = this.lexsize;
continue;
}
else if (c == '[')
{
/* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
this.lexsize -= 2;
this.state = LEX_SECTION;
this.txtend = this.lexsize;
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
this.txtstart = this.lexsize;
continue;
}
/* otherwise swallow chars up to and including next '>' */
while (true)
{
c = this.in.readChar();
if (c == '>') break;
if (c == -1)
{
this.in.ungetChar(c);
break;
}
}
this.lexsize -= 2;
this.lexbuf[this.lexsize] = (byte)'\0';
this.state = LEX_CONTENT;
continue;
}
/*
processing instructions
*/
if (c == '?')
{
this.lexsize -= 2;
this.state = LEX_PROCINSTR;
this.txtend = this.lexsize;
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
this.txtstart = this.lexsize;
continue;
}
/* Microsoft ASP's e.g. <% ... server-code ... %> */
if (c == '%')
{
this.lexsize -= 2;
this.state = LEX_ASP;
this.txtend = this.lexsize;
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
this.txtstart = this.lexsize;
continue;
}
/* Netscapes JSTE e.g. <# ... server-code ... #> */
if (c == '#')
{
this.lexsize -= 2;
this.state = LEX_JSTE;
this.txtend = this.lexsize;
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
this.txtstart = this.lexsize;
continue;
}
map = MAP((char)c);
/* check for start tag */
if ((map & LETTER) != 0)
{
this.in.ungetChar(c); /* push back letter */
this.lexsize -= 2; /* discard "<" + letter */
this.txtend = this.lexsize;
this.state = LEX_STARTTAG; /* ready to read tag name */
/* if some text before < return it now */
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
continue; /* no text so keep going */
}
/* otherwise treat as CDATA */
this.state = LEX_CONTENT;
this.waswhite = false;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -