📄 clean.java
字号:
/*
* @(#)Clean.java 1.11 2000/08/16
*
*/
package org.w3c.tidy;
/**
*
* Clean up misuse of presentation markup
*
* (c) 1998-2000 (W3C) MIT, INRIA, Keio University
* See Tidy.java for the copyright notice.
* Derived from <a href="http://www.w3.org/People/Raggett/tidy">
* HTML Tidy Release 4 Aug 2000</a>
*
* @author Dave Raggett <dsr@w3.org>
* @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
* @version 1.0, 1999/05/22
* @version 1.0.1, 1999/05/29
* @version 1.1, 1999/06/18 Java Bean
* @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
* @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
* @version 1.4, 1999/09/04 DOM support
* @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
* @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
* @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
* @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
* @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
* @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
* @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
*/
/*
Filters from other formats such as Microsoft Word
often make excessive use of presentation markup such
as font tags, B, I, and the align attribute. By applying
a set of production rules, it is straight forward to
transform this to use CSS.
Some rules replace some of the children of an element by
style properties on the element, e.g.
<p><b>...</b></p> -> <p style="font-weight: bold">...</p>
Such rules are applied to the element's content and then
to the element itself until none of the rules more apply.
Having applied all the rules to an element, it will have
a style attribute with one or more properties.
Other rules strip the element they apply to, replacing
it by style properties on the contents, e.g.
<dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
These rules are applied to an element before processing
its content and replace the current element by the first
element in the exposed content.
After applying both sets of rules, you can replace the
style attribute by a class value and style rule in the
document head. To support this, an association of styles
and class names is built.
A naive approach is to rely on string matching to test
when two property lists are the same. A better approach
would be to first sort the properties before matching.
*/
public class Clean {
private int classNum = 1;
private static StyleProp insertProperty(StyleProp props, String name,
String value)
{
StyleProp first, prev, prop;
int cmp;
prev = null;
first = props;
while (props != null)
{
cmp = props.name.compareTo(name);
if (cmp == 0)
{
/* this property is already defined, ignore new value */
return first;
}
if (cmp > 0) // props.name > name
{
/* insert before this */
prop = new StyleProp(new String(name), new String(value), props);
if (prev != null)
prev.next = prop;
else
first = prop;
return first;
}
prev = props;
props = props.next;
}
prop = new StyleProp(new String(name), new String(value));
if (prev != null)
prev.next = prop;
else
first = prop;
return first;
}
/*
Create sorted linked list of properties from style string
It temporarily places nulls in place of ':' and ';' to
delimit the strings for the property name and value.
Some systems don't allow you to null literal strings,
so to avoid this, a copy is made first.
*/
private static StyleProp createProps(StyleProp prop, String style)
{
int name_end;
int value_end;
int value_start = 0;
int name_start = 0;
boolean more;
name_start = 0;
while (name_start < style.length())
{
while (name_start < style.length() &&
style.charAt(name_start) == ' ')
++name_start;
name_end = name_start;
while (name_end < style.length())
{
if (style.charAt(name_end) == ':')
{
value_start = name_end + 1;
break;
}
++name_end;
}
if (name_end >= style.length() || style.charAt(name_end) != ':')
break;
while (value_start < style.length() &&
style.charAt(value_start) == ' ')
++value_start;
value_end = value_start;
more = false;
while (value_end < style.length())
{
if (style.charAt(value_end) == ';')
{
more = true;
break;
}
++value_end;
}
prop = insertProperty(prop,
style.substring(name_start, name_end),
style.substring(value_start, value_end));
if (more)
{
name_start = value_end + 1;
continue;
}
break;
}
return prop;
}
private static String createPropString(StyleProp props)
{
String style = "";
int len;
StyleProp prop;
/* compute length */
for (len = 0, prop = props; prop != null; prop = prop.next)
{
len += prop.name.length() + 2;
len += prop.value.length() + 2;
}
for (prop = props; prop != null; prop = prop.next)
{
style = style.concat(prop.name);
style = style.concat(": ");
style = style.concat(prop.value);
if (prop.next == null)
break;
style = style.concat("; ");
}
return style;
}
/*
create string with merged properties
*/
private static String addProperty(String style, String property)
{
StyleProp prop;
prop = createProps(null, style);
prop = createProps(prop, property);
style = createPropString(prop);
return style;
}
private String gensymClass(String tag)
{
String str;
str = "c" + classNum;
classNum++;
return str;
}
private String findStyle(Lexer lexer, String tag, String properties)
{
Style style;
for (style = lexer.styles; style != null; style=style.next)
{
if (style.tag.equals(tag) &&
style.properties.equals(properties))
return style.tagClass;
}
style = new Style(new String(tag), gensymClass(tag),
new String(properties), lexer.styles);
lexer.styles = style;
return style.tagClass;
}
/*
Find style attribute in node, and replace it
by corresponding class attribute. Search for
class in style dictionary otherwise gensym
new class and add to dictionary.
Assumes that node doesn't have a class attribute
*/
private void style2Rule(Lexer lexer, Node node)
{
AttVal styleattr, classattr;
String classname;
styleattr = node.getAttrByName("style");
if (styleattr != null)
{
classname = findStyle(lexer, node.element, styleattr.value);
classattr = node.getAttrByName("class");
/*
if there already is a class attribute
then append class name after a space
*/
if (classattr != null)
{
classattr.value = classattr.value + " " + classname;
node.removeAttribute(styleattr);
}
else /* reuse style attribute for class attribute */
{
styleattr.attribute = "class";
styleattr.value = classname;
}
}
}
private static void addColorRule(Lexer lexer, String selector, String color)
{
if (color != null)
{
lexer.addStringLiteral(selector);
lexer.addStringLiteral(" { color: ");
lexer.addStringLiteral(color);
lexer.addStringLiteral(" }\n");
}
}
/*
move presentation attribs from body to style element
background="foo" -> body { background-image: url(foo) }
bgcolor="foo" -> body { background-color: foo }
text="foo" -> body { color: foo }
link="foo" -> :link { color: foo }
vlink="foo" -> :visited { color: foo }
alink="foo" -> :active { color: foo }
*/
private static void cleanBodyAttrs(Lexer lexer, Node body)
{
AttVal attr;
String bgurl = null;
String bgcolor = null;
String color = null;
attr = body.getAttrByName("background");
if (attr != null)
{
bgurl = attr.value;
attr.value = null;
body.removeAttribute(attr);
}
attr = body.getAttrByName("bgcolor");
if (attr != null)
{
bgcolor = attr.value;
attr.value = null;
body.removeAttribute(attr);
}
attr = body.getAttrByName("text");
if (attr != null)
{
color = attr.value;
attr.value = null;
body.removeAttribute(attr);
}
if (bgurl != null || bgcolor != null || color != null)
{
lexer.addStringLiteral(" body {\n");
if (bgurl != null)
{
lexer.addStringLiteral(" background-image: url(");
lexer.addStringLiteral(bgurl);
lexer.addStringLiteral(");\n");
}
if (bgcolor != null)
{
lexer.addStringLiteral(" background-color: ");
lexer.addStringLiteral(bgcolor);
lexer.addStringLiteral(";\n");
}
if (color != null)
{
lexer.addStringLiteral(" color: ");
lexer.addStringLiteral(color);
lexer.addStringLiteral(";\n");
}
lexer.addStringLiteral(" }\n");
}
attr = body.getAttrByName("link");
if (attr != null)
{
addColorRule(lexer, " :link", attr.value);
body.removeAttribute(attr);
}
attr = body.getAttrByName("vlink");
if (attr != null)
{
addColorRule(lexer, " :visited", attr.value);
body.removeAttribute(attr);
}
attr = body.getAttrByName("alink");
if (attr != null)
{
addColorRule(lexer, " :active", attr.value);
body.removeAttribute(attr);
}
}
private static boolean niceBody(Lexer lexer, Node doc)
{
Node body = Node.findBody(doc);
if (body != null)
{
if (
body.getAttrByName("background") != null ||
body.getAttrByName("bgcolor") != null ||
body.getAttrByName("text") != null ||
body.getAttrByName("link") != null ||
body.getAttrByName("vlink") != null ||
body.getAttrByName("alink") != null
)
{
lexer.badLayout |= Report.USING_BODY;
return false;
}
}
return true;
}
/* create style element using rules from dictionary */
private static void createStyleElement(Lexer lexer, Node doc)
{
Node node, head, body;
Style style;
AttVal av;
if (lexer.styles == null && niceBody(lexer, doc))
return;
node = lexer.newNode(Node.StartTag, null, 0, 0, new String("style"));
node.implicit = true;
/* insert type attribute */
av = new AttVal(null, null, '"',
new String("type"),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -