📄 clean.java
字号:
Node child;
if (node.content != null)
{
for (child = node.content;
child != null; child = child.next)
{
defineStyleRules(lexer, child);
}
}
style2Rule(lexer, node);
}
public void cleanTree(Lexer lexer, Node doc)
{
doc = createStyleProperties(lexer, doc);
if (!lexer.configuration.MakeClean)
{
defineStyleRules(lexer, doc);
createStyleElement(lexer, doc);
}
}
/* simplifies <b><b> ... </b> ...</b> etc. */
public static void nestedEmphasis(Node node)
{
MutableObject o = new MutableObject();
Node next;
while (node != null)
{
next = node.next;
if ((node.tag == TagTable.tagB || node.tag == TagTable.tagI)
&& node.parent != null && node.parent.tag == node.tag)
{
/* strip redundant inner element */
o.setObject(next);
discardContainer(node, o);
next = (Node)o.getObject();
node = next;
continue;
}
if (node.content != null)
nestedEmphasis(node.content);
node = next;
}
}
/* replace i by em and b by strong */
public static void emFromI(Node node)
{
while (node != null)
{
if (node.tag == TagTable.tagI)
{
node.element = new String(TagTable.tagEm.name);
node.tag = TagTable.tagEm;
}
else if (node.tag == TagTable.tagB)
{
node.element = new String(TagTable.tagStrong.name);
node.tag = TagTable.tagStrong;
}
if (node.content != null)
emFromI(node.content);
node = node.next;
}
}
/*
Some people use dir or ul without an li
to indent the content. The pattern to
look for is a list with a single implicit
li. This is recursively replaced by an
implicit blockquote.
*/
public static void list2BQ(Node node)
{
while (node != null)
{
if (node.content != null)
list2BQ(node.content);
if (node.tag != null && node.tag.parser == ParserImpl.getParseList() &&
node.hasOneChild() && node.content.implicit)
{
stripOnlyChild(node);
node.element = new String(TagTable.tagBlockquote.name);
node.tag = TagTable.tagBlockquote;
node.implicit = true;
}
node = node.next;
}
}
/*
Replace implicit blockquote by div with an indent
taking care to reduce nested blockquotes to a single
div with the indent set to match the nesting depth
*/
public static void bQ2Div(Node node)
{
int indent;
String indent_buf;
while (node != null)
{
if (node.tag == TagTable.tagBlockquote && node.implicit)
{
indent = 1;
while(node.hasOneChild() &&
node.content.tag == TagTable.tagBlockquote &&
node.implicit)
{
++indent;
stripOnlyChild(node);
}
if (node.content != null)
bQ2Div(node.content);
indent_buf = "margin-left: " +
(new Integer(2*indent)).toString() + "em";
node.element = new String(TagTable.tagDiv.name);
node.tag = TagTable.tagDiv;
node.addAttribute("style", indent_buf);
}
else if (node.content != null)
bQ2Div(node.content);
node = node.next;
}
}
/* node is <![if ...]> prune up to <![endif]> */
public static Node pruneSection(Lexer lexer, Node node)
{
for (;;)
{
/* discard node and returns next */
node = Node.discardElement(node);
if (node == null)
return null;
if (node.type == Node.SectionTag)
{
if ((Lexer.getString(node.textarray, node.start, 2)).equals("if"))
{
node = pruneSection(lexer, node);
continue;
}
if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif"))
{
node = Node.discardElement(node);
break;
}
}
}
return node;
}
public static void dropSections(Lexer lexer, Node node)
{
while (node != null)
{
if (node.type == Node.SectionTag)
{
/* prune up to matching endif */
if ((Lexer.getString(node.textarray, node.start, 2)).equals("if"))
{
node = pruneSection(lexer, node);
continue;
}
/* discard others as well */
node = Node.discardElement(node);
continue;
}
if (node.content != null)
dropSections(lexer, node.content);
node = node.next;
}
}
public static void purgeAttributes(Node node)
{
AttVal attr = node.attributes;
AttVal next = null;
AttVal prev = null;
while (attr != null)
{
next = attr.next;
/* special check for class="Code" denoting pre text */
if (attr.attribute != null &&
attr.value != null &&
attr.attribute.equals("class") &&
attr.value.equals("Code"))
{
prev = attr;
}
else if (attr.attribute != null &&
(attr.attribute.equals("class") ||
attr.attribute.equals("style") ||
attr.attribute.equals("lang") ||
attr.attribute.startsWith("x:") ||
((attr.attribute.equals("height") || attr.attribute.equals("width")) &&
(node.tag == TagTable.tagTd || node.tag == TagTable.tagTr || node.tag == TagTable.tagTh))))
{
if (prev != null)
prev.next = next;
else
node.attributes = next;
}
else
prev = attr;
attr = next;
}
}
/* Word2000 uses span excessively, so we strip span out */
public static Node stripSpan(Lexer lexer, Node span)
{
Node node;
Node prev = null;
Node content;
/*
deal with span elements that have content
by splicing the content in place of the span
after having processed it
*/
cleanWord2000(lexer, span.content);
content = span.content;
if (span.prev != null)
prev = span.prev;
else if (content != null)
{
node = content;
content = content.next;
Node.removeNode(node);
Node.insertNodeBeforeElement(span, node);
prev = node;
}
while (content != null)
{
node = content;
content = content.next;
Node.removeNode(node);
Node.insertNodeAfterElement(prev, node);
prev = node;
}
if (span.next == null)
span.parent.last = prev;
node = span.next;
span.content = null;
Node.discardElement(span);
return node;
}
/* map non-breaking spaces to regular spaces */
private static void normalizeSpaces(Lexer lexer, Node node)
{
while (node != null)
{
if (node.content != null)
normalizeSpaces(lexer, node.content);
if (node.type == Node.TextNode)
{
int i;
MutableInteger c = new MutableInteger();
int p = node.start;
for (i = node.start; i < node.end; ++i)
{
c.value = (int)node.textarray[i];
/* look for UTF-8 multibyte character */
if (c.value > 0x7F)
i += PPrint.getUTF8(node.textarray, i, c);
if (c.value == 160)
c.value = ' ';
p = PPrint.putUTF8(node.textarray, p, c.value);
}
}
node = node.next;
}
}
/*
This is a major clean up to strip out all the extra stuff you get
when you save as web page from Word 2000. It doesn't yet know what
to do with VML tags, but these will appear as errors unless you
declare them as new tags, such as o:p which needs to be declared
as inline.
*/
public static void cleanWord2000(Lexer lexer, Node node)
{
/* used to a list from a sequence of bulletted p's */
Node list = null;
while (node != null)
{
/* discard Word's style verbiage */
if (node.tag == TagTable.tagStyle ||
node.tag == TagTable.tagMeta ||
node.type == Node.CommentTag)
{
node = Node.discardElement(node);
continue;
}
/* strip out all span tags Word scatters so liberally! */
if (node.tag == TagTable.tagSpan)
{
node = stripSpan(lexer, node);
continue;
}
/* get rid of Word's xmlns attributes */
if (node.tag == TagTable.tagHtml)
{
/* check that it's a Word 2000 document */
if (node.getAttrByName("xmlns:o") == null)
return;
}
if (node.tag == TagTable.tagLink)
{
AttVal attr = node.getAttrByName("rel");
if (attr != null && attr.value != null &&
attr.value.equals("File-List"))
{
node = Node.discardElement(node);
continue;
}
}
/* discard empty paragraphs */
if (node.content == null && node.tag == TagTable.tagP)
{
node = Node.discardElement(node);
continue;
}
if (node.tag == TagTable.tagP)
{
AttVal attr = node.getAttrByName("class");
/* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
if (attr != null && attr.value != null &&
attr.value.equals("MsoListBullet"))
{
Node.coerceNode(lexer, node, TagTable.tagLi);
if (list == null || list.tag != TagTable.tagUl)
{
list = lexer.inferredTag("ul");
Node.insertNodeBeforeElement(node, list);
}
purgeAttributes(node);
if (node.content != null)
cleanWord2000(lexer, node.content);
/* remove node and append to contents of list */
Node.removeNode(node);
Node.insertNodeAtEnd(list, node);
node = list.next;
}
/* map sequence of <p class="Code"> to <pre>...</pre> */
else if (attr != null && attr.value != null &&
attr.value.equals("Code"))
{
Node br = lexer.newLineNode();
normalizeSpaces(lexer, node);
if (list == null || list.tag != TagTable.tagPre)
{
list = lexer.inferredTag("pre");
Node.insertNodeBeforeElement(node, list);
}
/* remove node and append to contents of list */
Node.removeNode(node);
Node.insertNodeAtEnd(list, node);
stripSpan(lexer, node);
Node.insertNodeAtEnd(list, br);
node = list.next;
}
else
list = null;
}
else
list = null;
/* strip out style and class attributes */
if (node.type == Node.StartTag || node.type == Node.StartEndTag)
purgeAttributes(node);
if (node.content != null)
cleanWord2000(lexer, node.content);
node = node.next;
}
}
public static boolean isWord2000(Node root)
{
Node html = root.findHTML();
return (html != null && html.getAttrByName("xmlns:o") != null);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -