htmlrepairer.java
来自「linux下建立JAVA虚拟机的源码KAFFE」· Java 代码 · 共 675 行 · 第 1/2 页
JAVA
675 行
break;
}
}
if (!isAtomTag) {
// check whether this open tag is equal to the topmost
// entry on the stack; if yes, emit a close tag first
// corrects stuff like '<tr><td>...<td>...');
if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) {
printWarning("Inserting </"+tagName+">");
output.append("</"+tagName+">");
tagStack.pop();
}
else {
processKnownChildTags(tagName, tagStack, output);
}
// otherwise, we assume there are no close tags required
// before this open tag.
tagStack.push(tagName);
output.append("<"+tagName+newAttributes+">");
}
else {
output.append("<"+tagName+newAttributes+"/>");
}
}
}
private boolean processKnownChildTags(String tagName, Stack tagStack, StringBuffer output) {
TagInfo tagInfo = (TagInfo)tagInfoMap.get(tagName);
if (null != tagInfo) {
String parentTag = null;
for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) {
String tag = (String)en.nextElement();
if (tagInfo.isLegalParentTag(tag)) {
parentTag = tag;
break;
}
}
if (parentTag != null) {
while (((String)tagStack.peek()) != parentTag) {
String poppedTagName = (String)tagStack.pop();
output.append("</"+poppedTagName+">");
printWarning("Inserting </"+poppedTagName+">");
}
return true;
}
}
return false;
}
private void flush() {
// close all pending tags
while (!tagStack.isEmpty()) {
String tagName = (String)tagStack.pop();
printWarning("Inserting </"+tagName+">");
output.append("</"+tagName+">");
}
}
/**
* Takes HTML fragment and returns a well-formed XHTML
* equivalent.
*
* In the returned String, all tags are properly closed and
* nested.
*
* Currently, the returned String is not guaranteed to be
* well-formed. In particular there are no checks on the tag
* names, attribute names and entity names.
*/
public String getWellformedHTML(String text) {
final int STATE_INITIAL = 1;
final int STATE_TAG_START = 2;
final int STATE_TAG = 3;
final int STATE_TAG_DOUBLEQUOTE = 4;
final int STATE_TAG_SINGLEQUOTE = 5;
final int STATE_AMP = 6;
int state = STATE_INITIAL;
output.setLength(0);
StringBuffer buf = new StringBuffer();
char[] textAsChars = text.toCharArray();
outer_loop:
for (int i=0, ilim=textAsChars.length+1; i<ilim; ++i) {
int c;
if (i<textAsChars.length) {
c = textAsChars[i];
}
else {
c = -1;
}
switch (state) {
case STATE_INITIAL:
if ('<'==c) {
state = STATE_TAG_START;
if (buf.length()>0) {
haveText(buf.toString());
buf.setLength(0);
}
}
else if ('>'==c) {
// assume this is a greater-than sign
buf.append(">");
}
else if ('&'==c) {
state = STATE_AMP;
}
else if (-1==c) {
if (buf.length()>0) {
haveText(buf.toString());
buf.setLength(0);
}
continue;
}
else {
buf.append((char)c);
}
break;
case STATE_AMP:
if ('<'==c) {
buf.append("&");
state = STATE_TAG_START;
if (buf.length()>0) {
haveText(buf.toString());
buf.setLength(0);
}
}
else if ('>'==c) {
// assume this is a greater-than sign
buf.append("&");
buf.append(">");
state = STATE_INITIAL;
}
else if ('&'==c) {
buf.append("&");
buf.append("&");
state = STATE_INITIAL;
}
else if (-1==c) {
buf.append("&");
haveText(buf.toString());
buf.setLength(0);
state = STATE_INITIAL;
continue;
}
else {
// peek forward and see whether this is a valid entity.
if ('#'==c) {
buf.append("&");
buf.append((char)c);
state = STATE_INITIAL;
continue outer_loop;
}
else if (Character.isLetter((char)c)) {
for (int i2=i+1; i2<ilim-1; i2++) {
if (';' == textAsChars[i2]) {
buf.append("&");
buf.append((char)c);
state = STATE_INITIAL;
continue outer_loop;
}
else if (!Character.isLetter((char)c)
&& !Character.isDigit((char)c)
&& ".-_:".indexOf((char)c) < 0
//&& !isCombiningChar(c) // FIXME
//&& !isExtender(c) // FIXME
) {
break;
}
}
// not a valid entity declaration; assume &
}
buf.append("&");
buf.append((char)c);
state = STATE_INITIAL;
}
/*
else if ('#'==c || Character.isLetter((char)c)) {
buf.append("&");
buf.append((char)c);
state = STATE_INITIAL;
}
else {
buf.append("&");
buf.append((char)c);
state = STATE_INITIAL;
}
*/
break;
case STATE_TAG_START:
if (" \t\r\n".indexOf(c)>=0) {
//continue;
// new: assume this is a less-sign
haveText("<"+c);
state = STATE_INITIAL;
}
else if ('/'==c) {
buf.append((char)c);
state = STATE_TAG;
}
else if ('<'==c) {
// assume this is a less-sign
haveText("<<");
state = STATE_INITIAL;
}
else if ('>'==c) {
// assume this is a less-sign
haveText("<>");
state = STATE_INITIAL;
}
//else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) {
else if (!Character.isLetter((char)c)) {
// assume this is a less-sign
haveText("<"+(char)c);
state = STATE_INITIAL;
}
else {
buf.append((char)c);
state = STATE_TAG;
}
break;
case STATE_TAG:
if ('\"'==c) {
buf.append((char)c);
state = STATE_TAG_DOUBLEQUOTE;
}
else if ('\''==c) {
buf.append((char)c);
state = STATE_TAG_SINGLEQUOTE;
}
else if ('>'==c) {
state = STATE_INITIAL;
haveStartOrEndTag(buf.toString());
buf.setLength(0);
}
else if ('<'==c) {
// notify user, missing greater-than sign
haveStartOrEndTag(buf.toString());
buf.setLength(0);
}
else if (-1==c) {
printWarning("Unclosed tag at end-of-comment: <"+buf);
haveStartOrEndTag(buf.toString());
buf.setLength(0);
}
else {
buf.append((char)c);
}
break;
case STATE_TAG_DOUBLEQUOTE:
if ('\"'==c) {
buf.append((char)c);
state = STATE_TAG;
}
else if (-1==c) {
printWarning("Unclosed attribute value at end-of-comment.");
haveStartOrEndTag(buf.toString()+"\"");
}
else {
buf.append((char)c);
}
break;
case STATE_TAG_SINGLEQUOTE:
if ('\''==c) {
buf.append((char)c);
state = STATE_TAG;
}
else if (-1==c) {
printWarning("Unclosed attribute value at end-of-comment.");
haveStartOrEndTag(buf.toString()+"'");
}
else {
buf.append((char)c);
}
break;
}
}
return output.toString();
}
private String getContext() {
if (null != contextClass) {
StringBuffer rc = new StringBuffer();
rc.append(contextClass.qualifiedTypeName());
if (null != contextMember) {
rc.append("."+contextMember.toString());
}
return rc.toString();
}
else {
return null;
}
}
private void printWarning(String msg) {
if (null != warningReporter && !noWarn) {
String context = getContext();
if (null != context) {
warningReporter.printWarning("In "+getContext()+": "+msg);
}
else {
warningReporter.printWarning("In overview page: "+msg);
}
}
}
public String terminateText() {
output.setLength(0);
flush();
return output.toString();
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?