📄 lexer.java
字号:
addStringLiteral("\"");
}
if (sysid.length() + 6 >= this.configuration.wraplen)
addStringLiteral("\n\"");
else
addStringLiteral("\n \"");
/* add system identifier */
addStringLiteral(sysid);
addStringLiteral("\"");
this.txtend = this.lexsize;
doctype.start = this.txtstart;
doctype.end = this.txtend;
return false;
}
public short apparentVersion()
{
switch (this.doctype)
{
case Dict.VERS_UNKNOWN:
return HTMLVersion();
case Dict.VERS_HTML20:
if ((this.versions & Dict.VERS_HTML20) != 0)
return Dict.VERS_HTML20;
break;
case Dict.VERS_HTML32:
if ((this.versions & Dict.VERS_HTML32) != 0)
return Dict.VERS_HTML32;
break; /* to replace old version by new */
case Dict.VERS_HTML40_STRICT:
if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
return Dict.VERS_HTML40_STRICT;
break;
case Dict.VERS_HTML40_LOOSE:
if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
return Dict.VERS_HTML40_LOOSE;
break; /* to replace old version by new */
case Dict.VERS_FRAMES:
if ((this.versions & Dict.VERS_FRAMES) != 0)
return Dict.VERS_FRAMES;
break;
}
Report.warning(this, null, null, Report.INCONSISTENT_VERSION);
return this.HTMLVersion();
}
/* fixup doctype if missing */
public boolean fixDocType(Node root)
{
Node doctype;
int guessed = Dict.VERS_HTML40_STRICT, i;
if (this.badDoctype)
Report.warning(this, null, null, Report.MALFORMED_DOCTYPE);
if (configuration.XmlOut)
return true;
doctype = root.findDocType();
if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
{
if (doctype != null)
Node.discardElement(doctype);
return true;
}
if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
{
Node.discardElement(doctype);
doctype = null;
guessed = Dict.VERS_HTML40_STRICT;
}
else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
{
Node.discardElement(doctype);
doctype = null;
guessed = Dict.VERS_HTML40_LOOSE;
}
else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
{
if (doctype != null)
{
if (this.doctype == Dict.VERS_UNKNOWN)
return false;
switch (this.doctype)
{
case Dict.VERS_UNKNOWN:
return false;
case Dict.VERS_HTML20:
if ((this.versions & Dict.VERS_HTML20) != 0)
return true;
break; /* to replace old version by new */
case Dict.VERS_HTML32:
if ((this.versions & Dict.VERS_HTML32) != 0)
return true;
break; /* to replace old version by new */
case Dict.VERS_HTML40_STRICT:
if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
return true;
break; /* to replace old version by new */
case Dict.VERS_HTML40_LOOSE:
if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
return true;
break; /* to replace old version by new */
case Dict.VERS_FRAMES:
if ((this.versions & Dict.VERS_FRAMES) != 0)
return true;
break; /* to replace old version by new */
}
/* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
}
/* choose new doctype */
guessed = HTMLVersion();
}
if (guessed == Dict.VERS_UNKNOWN)
return false;
/* for XML use the Voyager system identifier */
if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager)
{
if (doctype != null)
Node.discardElement(doctype);
for (i = 0; i < W3CVersion.length; ++i)
{
if (guessed == W3CVersion[i].code)
{
fixHTMLNameSpace(root, W3CVersion[i].profile);
break;
}
}
return true;
}
if (doctype == null)
{
doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
doctype.next = root.content;
doctype.parent = root;
doctype.prev = null;
root.content = doctype;
}
this.txtstart = this.lexsize;
this.txtend = this.lexsize;
/* use the appropriate public identifier */
addStringLiteral("html PUBLIC ");
if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
configuration.docTypeStr != null)
addStringLiteral(configuration.docTypeStr);
else if (guessed == Dict.VERS_HTML20)
addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
else
{
addStringLiteral("\"-//W3C//DTD ");
for (i = 0; i < W3CVersion.length; ++i)
{
if (guessed == W3CVersion[i].code)
{
addStringLiteral(W3CVersion[i].name);
break;
}
}
addStringLiteral("//EN\"");
}
this.txtend = this.lexsize;
doctype.start = this.txtstart;
doctype.end = this.txtend;
return true;
}
/* ensure XML document starts with <?XML version="1.0"?> */
public boolean fixXMLPI(Node root)
{
Node xml;
int s;
if( root.content != null && root.content.type == Node.ProcInsTag)
{
s = root.content.start;
if (this.lexbuf[s] == (byte)'x' &&
this.lexbuf[s+1] == (byte)'m' &&
this.lexbuf[s+2] == (byte)'l')
return true;
}
xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0);
xml.next = root.content;
if (root.content != null)
{
root.content.prev = xml;
xml.next = root.content;
}
root.content = xml;
this.txtstart = this.lexsize;
this.txtend = this.lexsize;
addStringLiteral("xml version=\"1.0\"");
if (this.configuration.CharEncoding == Configuration.LATIN1)
addStringLiteral(" encoding=\"ISO-8859-1\"");
this.txtend = this.lexsize;
xml.start = this.txtstart;
xml.end = this.txtend;
return false;
}
public Node inferredTag(String name)
{
Node node;
node = newNode(Node.StartTag,
this.lexbuf,
this.txtstart,
this.txtend,
new String(name));
node.implicit = true;
return node;
}
public static boolean expectsContent(Node node)
{
if (node.type != Node.StartTag)
return false;
/* unknown element? */
if (node.tag == null)
return true;
if ((node.tag.model & Dict.CM_EMPTY) != 0)
return false;
return true;
}
/*
create a text node for the contents of
a CDATA element like style or script
which ends with </foo> for some foo.
*/
public Node getCDATA(Node container)
{
int c, lastc, start, len, i;
String str;
boolean endtag = false;
this.lines = this.in.curline;
this.columns = this.in.curcol;
this.waswhite = false;
this.txtstart = this.lexsize;
this.txtend = this.lexsize;
lastc = (int)'\0';
start = -1;
while (true)
{
c = this.in.readChar();
if (c == StreamIn.EndOfStream) break;
/* treat \r\n as \n and \r as \n */
if (c == (int)'/' && lastc == (int)'<')
{
if (endtag)
{
this.lines = this.in.curline;
this.columns = this.in.curcol - 3;
Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
}
start = this.lexsize + 1; /* to first letter */
endtag = true;
}
else if (c == (int)'>' && start >= 0)
{
len = this.lexsize - start;
if (len == container.element.length())
{
str = getString( this.lexbuf, start, len );
if (Lexer.wstrcasecmp(str, container.element) == 0)
{
this.txtend = start - 2;
break;
}
}
this.lines = this.in.curline;
this.columns = this.in.curcol - 3;
Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
/* if javascript insert backslash before / */
if (ParserImpl.isJavaScript(container))
{
for (i = this.lexsize; i > start-1; --i)
this.lexbuf[i] = this.lexbuf[i-1];
this.lexbuf[start-1] = (byte)'\\';
this.lexsize++;
}
start = -1;
}
else if (c == (int)'\r')
{
c = this.in.readChar();
if (c != (int)'\n')
this.in.ungetChar(c);
c = (int)'\n';
}
addCharToLexer((int)c);
this.txtend = this.lexsize;
lastc = c;
}
if (c == StreamIn.EndOfStream)
Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR);
if (this.txtend > this.txtstart)
{
this.token = newNode(Node.TextNode,
this.lexbuf,
this.txtstart,
this.txtend);
return this.token;
}
return null;
}
public void ungetToken()
{
this.pushed = true;
}
public static final short IgnoreWhitespace = 0;
public static final short MixedContent = 1;
public static final short Preformatted = 2;
public static final short IgnoreMarkup = 3;
/*
modes for GetToken()
MixedContent -- for elements which don't accept PCDATA
Preformatted -- white space preserved as is
IgnoreMarkup -- for CDATA elements such as script, style
*/
public Node getToken(short mode)
{
short map;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -