📄 htmlrewriter.java
字号:
private boolean removeHead = true;
private boolean openInNewWindow = false;
// remove the onClick=, onBlur=, etc. - Attributes
private boolean removeOnSomething = true;
private boolean inScript = false;
private boolean inStyle = false;
private StringWriter result = new StringWriter();
private Callback () {
}
private Callback addToResult(Object txt)
{
// to allow for implementation using Stringbuffer or StringWriter
// I don't know yet, which one is better in this case
if (ignoreLevel > 0) return this;
try {
result.write(txt.toString());
} catch (Exception e) { /* ignore */ }
return this;
}
private Callback addToResult(char[] txt)
{
if (ignoreLevel > 0) return this;
try {
result.write(txt);
} catch (Exception e) { /* ignore */ }
return this;
}
/** Accessor to the Callback's content-String
* @return Cleaned and rewritten HTML-Content
*/
public String getResult() {
try {
result.flush();
} catch (Exception e) { /* ignore */ }
// WARNING: doesn't work, if you remove " " + ... but don't know why
String res = " " + result.toString();
return res;
}
public void flush() throws javax.swing.text.BadLocationException {
// nothing to do here ...
}
/**
* Because Scripts and Stlyle sometimes are defined in comments, thoese
* will be written. Otherwise comments are removed
*/
public void handleComment(char[] values,int param) {
if ( !( inStyle || inScript))
return;
try {
result.write("<!--");
result.write(values);
result.write("-->");
} catch (Exception e) { /* ignore */ }
// we ignore them
}
public void handleEndOfLineString(java.lang.String str) {
addToResult("\n");
}
public void handleError(java.lang.String str,int param) {
// ignored
}
public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param) {
if (removeMeta && (tag == HTML.Tag.META)) {
return;
}
appendTagToResult(tag,attrs);
}
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int position) {
appendTagToResult(tag,attrs);
}
public void handleEndTag(HTML.Tag tag, int position) {
if ((tag ==HTML.Tag.FORM) && (inForm)) {
// form handling seems to be buggy
addToResult("</").addToResult(tag).addToResult(">");
inForm = false;
} else if (tag == HTML.Tag.FORM) {
// do nothing! ... i.e. we are now outside of any <FORM>, so a
// closing </form> is not really needed ...
} else {
addToResult("</").addToResult(tag).addToResult(">");
}
if ( (removeScript == false) && (tag == HTML.Tag.SCRIPT)) {
inScript = false;
} else if ( (removeStyle == false) && (tag == HTML.Tag.STYLE)) {
inStyle = false;
}
if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
ignoreLevel --;
} else if ( removeStyle && (tag == HTML.Tag.STYLE)) {
ignoreLevel --;
} else if ( removeHead && (tag == HTML.Tag.HEAD)) {
ignoreLevel --;
} else if ( removeApplet && (tag == HTML.Tag.APPLET)) {
ignoreLevel --;
} else if ( removeObject && (tag == HTML.Tag.OBJECT)) {
ignoreLevel --;
} else if ( removeNoScript && (tag.toString().equalsIgnoreCase("NOSCRIPT"))) {
ignoreLevel --;
}
}
private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs) {
if (tag.toString().equalsIgnoreCase("__ENDOFLINETAG__")) {
// jdk 1.2.2 places a tag <__ENDOFLINETAG__> in the result ...
// we don't want this one
return;
}
if (tag.toString().equalsIgnoreCase("__IMPLIED__")) {
// jdk 1.3 places a tag <__IMPLIED__> in the result ...
// we don't want this one
return;
}
convertURLS(tag,attrs);
Enumeration e = attrs.getAttributeNames();
if (tag == HTML.Tag.BASE)
return;
addToResult("<").addToResult(tag);
while (e.hasMoreElements()) {
Object attr = e.nextElement();
String attrName = attr.toString();
String value = attrs.getAttribute(attr).toString();
// include attribute only when Not(RemoveOnSomething = True and starts with "on")
if (!(removeOnSomething
&& attrName.toLowerCase().startsWith("on")
&& (attrName.length() > 2))) {
// Attribute included
addToResult(" ").addToResult(attr).addToResult("=\"")
.addToResult(value).addToResult("\"");
}
}
addToResult(">");
}
/** Here the magic happens.
*
* If someone wants new types of URLs to be rewritten, add them here
* @param tag TAG from the Callback-Interface
* @param attrs Attribute-Set from the Callback-Interface
*/
private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs ) {
// first we do an URL-rewrite on different tags
if (tag == HTML.Tag.A) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
// ---- CHECKING <A HREF
addConvertedAttribute( HTML.Attribute.HREF,
attrs );
}
if ((attrs.getAttribute(HTML.Attribute.TARGET) == null) && cb.openInNewWindow) {
attrs.addAttribute(HTML.Attribute.TARGET, "_BLANK");
}
} else if (tag == HTML.Tag.AREA) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
// ---- CHECKING <A HREF
addConvertedAttribute( HTML.Attribute.HREF,
attrs );
}
if ((attrs.getAttribute(HTML.Attribute.TARGET) == null) && cb.openInNewWindow) {
attrs.addAttribute(HTML.Attribute.TARGET, "_BLANK");
}
} else if (((tag == HTML.Tag.IMG) || (tag == HTML.Tag.INPUT) || (tag == HTML.Tag.SCRIPT))
&& (attrs.getAttribute(HTML.Attribute.SRC) != null)) {
// ---- CHECKING <IMG SRC & <INPUT SRC
addConvertedAttribute( HTML.Attribute.SRC,
attrs );
} else if (tag == HTML.Tag.LINK) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
// ---- CHECKING <LINK HREF
addConvertedAttribute( HTML.Attribute.HREF,
attrs );
}
} else if ( tag == HTML.Tag.APPLET ) {
// ---- CHECKING <APPLET CODEBASE=
if (attrs.getAttribute(HTML.Attribute.CODEBASE) == null) {
int endOfPath = baseUrl.toString().lastIndexOf("/");
attrs.addAttribute(HTML.Attribute.CODEBASE,
baseUrl.toString().substring(0,endOfPath +1));
} else {
addConvertedAttribute( HTML.Attribute.CODEBASE, attrs );
}
} else if (tag == HTML.Tag.OBJECT) {
// ---- CHECKING <OBJECT CODEBASE=
if (attrs.getAttribute(HTML.Attribute.CODEBASE) == null) {
int endOfPath = baseUrl.toString().lastIndexOf("/");
attrs.addAttribute(HTML.Attribute.CODEBASE,
baseUrl.toString().substring(0,endOfPath +1));
} else {
addConvertedAttribute( HTML.Attribute.CODEBASE, attrs );
}
} else if (tag == HTML.Tag.BODY) {
if (attrs.getAttribute(HTML.Attribute.BACKGROUND) != null) {
// background images are applied to the ENTIRE page, this remove them!
attrs.removeAttribute( HTML.Attribute.BACKGROUND);
}
} else if (tag == HTML.Tag.BASE) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
try {
baseUrl = new URL(attrs.getAttribute(HTML.Attribute.HREF).toString());
} catch (Throwable t) {
logger.error( "HTMLRewriter: Setting BASE="
+ attrs.getAttribute(HTML.Attribute.HREF).toString()
+ t.getMessage());
}
attrs.removeAttribute(HTML.Attribute.HREF);
}
} else if (tag == HTML.Tag.FORM) {
// ---- CHECKING <FORM ACTION=
inForm = true; // buggy <form> handling in jdk 1.3
if (attrs.getAttribute(HTML.Attribute.ACTION) == null) {
//self referencing <FORM>
attrs.addAttribute(HTML.Attribute.ACTION,
baseUrl.toString());
} else {
addConvertedAttribute( HTML.Attribute.ACTION,
attrs );
}
} else if (tag == HTML.Tag.TD) {
// ---- CHECKING <TD BACKGROUND=
if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null)) {
addConvertedAttribute( HTML.Attribute.BACKGROUND,
attrs );
}
}
// then we check for ignored tags ...
// btw. I know, that this code could be written in a shorter way, but
// I think it's more readable like this ...
// don't forget to add changes to handleEndTag() as well, else
// things will get screwed up!
if ( (removeScript == false) && (tag == HTML.Tag.SCRIPT)) {
inScript = true;
} else if ( (removeStyle == false) && (tag == HTML.Tag.STYLE)) {
inStyle = true;
}
if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
ignoreLevel ++;
} else if ( removeStyle && (tag == HTML.Tag.STYLE)) {
ignoreLevel ++;
} else if ( removeHead && (tag == HTML.Tag.HEAD)) {
ignoreLevel ++;
} else if ( removeApplet && (tag == HTML.Tag.APPLET)) {
ignoreLevel ++;
} else if ( removeObject && (tag == HTML.Tag.OBJECT)) {
ignoreLevel ++;
} else if (removeNoScript && (tag.toString().equalsIgnoreCase("NOSCRIPT"))) {
ignoreLevel ++;
}
}
/**
*
* Converts the given attribute to base URL, if not null
*
*/
private void addConvertedAttribute( HTML.Attribute attr,
MutableAttributeSet attrs ) {
if( attrs.getAttribute( attr ) != null ) {
String attrSource = attrs.getAttribute( attr ).toString();
attrs.addAttribute( attr,
generateNewUrl( attrSource ) );
}
}
private String generateNewUrl(String oldURL) {
try {
URL x = new URL(baseUrl,oldURL);
return x.toString();
} catch (Throwable t) {
if (oldURL.toLowerCase().startsWith("javascript:")) {
return oldURL;
}
logger.error( "HTMLRewriter: Setting BASE="
+ baseUrl
+ " Old = "
+ oldURL
+ t.getMessage());
return oldURL; // default behaviour ...
}
}
public void handleText(char[] values,int param) {
addToResult(values);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -