📄 lexer.java
字号:
else /* naked & */
{
Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
}
}
else
{
if (c != ';') /* issue warning if not terminated by ';' */
{
/* set error position just before offending chararcter */
this.lines = this.in.curline;
this.columns = startcol;
Report.entityError(this, Report.MISSING_SEMICOLON, str, c);
}
this.lexsize = start;
if (ch == 160 && (mode & Preformatted) != 0)
ch = ' ';
addCharToLexer(ch);
if (ch == '&' && !this.configuration.QuoteAmpersand)
{
addCharToLexer('a');
addCharToLexer('m');
addCharToLexer('p');
addCharToLexer(';');
}
}
}
public char parseTagName()
{
short map;
int c;
/* fold case of first char in buffer */
c = this.lexbuf[this.txtstart];
map = MAP((char)c);
if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
{
c += (int)((int)'a' - (int)'A');
this.lexbuf[this.txtstart] = (byte)c;
}
while (true)
{
c = this.in.readChar();
if (c == StreamIn.EndOfStream) break;
map = MAP((char)c);
if ((map & NAMECHAR) == 0)
break;
/* fold case of subsequent chars */
if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
c += (int)((int)'a' - (int)'A');
addCharToLexer(c);
}
this.txtend = this.lexsize;
return (char)c;
}
public void addStringLiteral(String str)
{
for ( int i = 0; i < str.length(); i++ ) {
addCharToLexer( (int)str.charAt(i) );
}
}
/* choose what version to use for new doctype */
public short HTMLVersion()
{
short versions;
versions = this.versions;
if ((versions & Dict.VERS_HTML20) != 0)
return Dict.VERS_HTML20;
if ((versions & Dict.VERS_HTML32) != 0)
return Dict.VERS_HTML32;
if ((versions & Dict.VERS_HTML40_STRICT) != 0)
return Dict.VERS_HTML40_STRICT;
if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
return Dict.VERS_HTML40_LOOSE;
if ((versions & Dict.VERS_FRAMES) != 0)
return Dict.VERS_FRAMES;
return Dict.VERS_UNKNOWN;
}
public String HTMLVersionName()
{
short guessed;
int j;
guessed = apparentVersion();
for (j = 0; j < W3CVersion.length; ++j)
{
if (guessed == W3CVersion[j].code)
{
if (this.isvoyager)
return W3CVersion[j].voyagerName;
return W3CVersion[j].name;
}
}
return null;
}
/* add meta element for Tidy */
public boolean addGenerator(Node root)
{
AttVal attval;
Node node;
Node head = Node.findHEAD(root);
if (head != null)
{
for (node = head.content; node != null; node = node.next)
{
if (node.tag == TagTable.tagMeta)
{
attval = node.getAttrByName("name");
if (attval != null && attval.value != null &&
Lexer.wstrcasecmp(attval.value, "generator") == 0)
{
attval = node.getAttrByName("content");
if (attval != null && attval.value != null &&
attval.value.length() >= 9 &&
Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0)
{
return false;
}
}
}
}
node = this.inferredTag("meta");
node.addAttribute("content", "HTML Tidy, see www.w3.org");
node.addAttribute("name", "generator");
Node.insertNodeAtStart(head, node);
return true;
}
return false;
}
/* return true if substring s is in p and isn't all in upper case */
/* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
/* len is how many chars to check in p */
private static boolean findBadSubString(String s, String p, int len)
{
int n = s.length();
int i = 0;
String ps;
while (n < len)
{
ps = p.substring(i, i + n);
if (wstrcasecmp(s, ps) == 0)
return (!ps.equals(s.substring(0, n)));
++i;
--len;
}
return false;
}
public boolean checkDocTypeKeyWords(Node doctype)
{
int len = doctype.end - doctype.start;
String s = getString(this.lexbuf, doctype.start, len);
return !(
findBadSubString("SYSTEM", s, len) ||
findBadSubString("PUBLIC", s, len) ||
findBadSubString("//DTD", s, len) ||
findBadSubString("//W3C", s, len) ||
findBadSubString("//EN", s, len)
);
}
/* examine <!DOCTYPE> to identify version */
public short findGivenVersion(Node doctype)
{
String p, s;
int i, j;
int len;
String str1;
String str2;
/* if root tag for doctype isn't html give up now */
str1 = getString(this.lexbuf, doctype.start, 5);
if (wstrcasecmp(str1, "html ") != 0)
return 0;
if (!checkDocTypeKeyWords(doctype))
Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
/* give up if all we are given is the system id for the doctype */
str1 = getString(this.lexbuf, doctype.start + 5, 7);
if (wstrcasecmp(str1, "SYSTEM ") == 0)
{
/* but at least ensure the case is correct */
if (!str1.substring(0, 6).equals("SYSTEM"))
System.arraycopy( getBytes("SYSTEM"), 0,
this.lexbuf, doctype.start + 5, 6 );
return 0; /* unrecognized */
}
if (wstrcasecmp(str1, "PUBLIC ") == 0)
{
if (!str1.substring(0, 6).equals("PUBLIC"))
System.arraycopy( getBytes("PUBLIC "), 0,
this.lexbuf, doctype.start + 5, 6 );
}
else
this.badDoctype = true;
for (i = doctype.start; i < doctype.end; ++i)
{
if (this.lexbuf[i] == (byte)'"')
{
str1 = getString( this.lexbuf, i + 1, 12 );
str2 = getString( this.lexbuf, i + 1, 13 );
if (str1.equals("-//W3C//DTD "))
{
/* compute length of identifier e.g. "HTML 4.0 Transitional" */
for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
len = j - i - 13;
p = getString( this.lexbuf, i + 13, len );
for (j = 1; j < W3CVersion.length; ++j)
{
s = W3CVersion[j].name;
if (len == s.length() && s.equals(p))
return W3CVersion[j].code;
}
/* else unrecognized version */
}
else if (str2.equals("-//IETF//DTD "))
{
/* compute length of identifier e.g. "HTML 2.0" */
for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
len = j - i - 14;
p = getString( this.lexbuf, i + 14, len );
s = W3CVersion[0].name;
if (len == s.length() && s.equals(p))
return W3CVersion[0].code;
/* else unrecognized version */
}
break;
}
}
return 0;
}
public void fixHTMLNameSpace(Node root, String profile)
{
Node node;
AttVal prev, attr;
for (node = root.content;
node != null && node.tag != TagTable.tagHtml; node = node.next);
if (node != null)
{
prev = null;
for (attr = node.attributes; attr != null; attr = attr.next)
{
if (attr.attribute.equals("xmlns"))
break;
prev = attr;
}
if (attr != null)
{
if (!attr.value.equals(profile))
{
Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
attr.value = new String(profile);
}
}
else
{
attr = new AttVal( node.attributes, null, (int)'"',
"xmlns", new String( profile ) );
attr.dict =
AttributeTable.getDefaultAttributeTable().findAttribute( attr );
node.attributes = attr;
}
}
}
public boolean setXHTMLDocType(Node root)
{
String fpi = " ";
String sysid = "";
String namespace = XHTML_NAMESPACE;
Node doctype;
doctype = root.findDocType();
if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
{
if (doctype != null)
Node.discardElement(doctype);
return true;
}
if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
{
/* see what flavor of XHTML this document matches */
if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
{ /* use XHTML strict */
fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
sysid = voyager_strict;
}
else if ((this.versions & Dict.VERS_LOOSE) != 0)
{
fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
sysid = voyager_loose;
}
else if ((this.versions & Dict.VERS_FRAMES) != 0)
{ /* use XHTML frames */
fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
sysid = voyager_frameset;
}
else /* lets assume XHTML transitional */
{
fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
sysid = voyager_loose;
}
}
else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
{
fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
sysid = voyager_strict;
}
else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
{
fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
sysid = voyager_loose;
}
fixHTMLNameSpace(root, namespace);
if (doctype == null)
{
doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
doctype.next = root.content;
doctype.parent = root;
doctype.prev = null;
root.content = doctype;
}
if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
configuration.docTypeStr != null)
{
fpi = configuration.docTypeStr;
sysid = "";
}
this.txtstart = this.lexsize;
this.txtend = this.lexsize;
/* add public identifier */
addStringLiteral("html PUBLIC ");
/* check if the fpi is quoted or not */
if (fpi.charAt(0) == '"')
addStringLiteral(fpi);
else
{
addStringLiteral("\"");
addStringLiteral(fpi);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -