htmlcleaner.java
来自「html过滤 html过滤 html过滤」· Java 代码 · 共 902 行 · 第 1/2 页
JAVA
902 行
private void saveToLastOpenTag(List nodeList, Object tokenToAdd) {
TagPos last = _openTags.getLastTagPos();
if ( last != null && last.info != null && last.info.isIgnorePermitted() ) {
return;
}
TagPos rubbishPos = _openTags.findTagToPlaceRubbish();
if (rubbishPos != null) {
TagNode startTagToken = (TagNode) nodeList.get(rubbishPos.position);
startTagToken.addItemForMoving(tokenToAdd);
}
}
private boolean isStartToken(Object o) {
return (o instanceof TagNode) && !((TagNode)o).isFormed();
}
void makeTree(List nodeList, ListIterator nodeIterator) {
// process while not reach the end of the list
while ( nodeIterator.hasNext() ) {
BaseToken token = (BaseToken) nodeIterator.next();
if (token instanceof EndTagToken) {
EndTagToken endTagToken = (EndTagToken) token;
String tagName = endTagToken.getName();
TagInfo tag = tagInfoProvider.getTagInfo(tagName);
if ( (tag == null && omitUnknownTags) || (tag != null && tag.isDeprecated() && omitDeprecatedTags) ) {
nodeIterator.set(null);
} else if ( tag != null && !tag.allowsBody() ) {
nodeIterator.set(null);
} else {
TagPos matchingPosition = _openTags.findTag(tagName);
if (matchingPosition != null) {
closeSnippet(nodeList, matchingPosition, endTagToken);
} else if ( !isAllowedInLastOpenTag(token) ) {
saveToLastOpenTag(nodeList, token);
}
nodeIterator.set(null);
}
} else if ( isStartToken(token) ) {
TagNode startTagToken = (TagNode) token;
String tagName = startTagToken.getName();
TagInfo tag = tagInfoProvider.getTagInfo(tagName);
// add tag to set of all tags
allTags.add(tagName);
// HTML open tag
if ( "html".equals(tagName) ) {
addAttributesToTag(htmlNode, startTagToken.getAttributes());
nodeIterator.set(null);
// BODY open tag
} else if ( "body".equals(tagName) ) {
addAttributesToTag(bodyNode, startTagToken.getAttributes());
nodeIterator.set(null);
// HEAD open tag
} else if ( "head".equals(tagName) ) {
addAttributesToTag(headNode, startTagToken.getAttributes());
nodeIterator.set(null);
// unknows HTML tag and unknown tags are not allowed
} else if ( (tag == null && omitUnknownTags) || (tag != null && tag.isDeprecated() && omitDeprecatedTags) ) {
nodeIterator.set(null);
} else if ( tag != null && tag.hasPermittedTags() && _openTags.someAlreadyOpen(tag.getPermittedTags()) ) {
nodeIterator.set(null);
// if tag that must be unique, ignore this occurence
} else if ( tag != null && tag.isUnique() && _openTags.tagEncountered(tagName) ) {
nodeIterator.set(null);
// if there is no required outer tag without that this open tag is ignored
} else if ( !isFatalTagSatisfied(tag) ) {
nodeIterator.set(null);
// if there is no required parent tag - it must be added before this open tag
} else if ( mustAddRequiredParent(tag) ) {
String requiredParent = tag.getRequiredParent();
TagNode requiredParentStartToken = new TagNode(requiredParent);
nodeIterator.previous();
nodeIterator.add(requiredParentStartToken);
nodeIterator.previous();
// if last open tag has lower presidence then this, it must be closed
} else if ( tag != null && !_openTags.isEmpty() && tag.isMustCloseTag( tagInfoProvider.getTagInfo(_openTags.getLastTagPos().name)) ) {
List closed = closeSnippet(nodeList, _openTags.getLastTagPos(), startTagToken);
int closedCount = closed.size();
// it is needed to copy some tags again in front of current, if there are any
if ( tag.hasCopyTags() && closedCount > 0 ) {
// first iterates over list from the back and collects all start tokens
// in sequence that must be copied
ListIterator closedIt = closed.listIterator(closedCount);
List toBeCopied = new ArrayList();
while (closedIt.hasPrevious()) {
TagNode currStartToken = (TagNode) closedIt.previous();
if ( tag.isCopy(currStartToken.getName()) ) {
toBeCopied.add(0, currStartToken);
} else {
break;
}
}
if (toBeCopied.size() > 0) {
Iterator copyIt = toBeCopied.iterator();
while (copyIt.hasNext()) {
TagNode currStartToken = (TagNode) copyIt.next();
nodeIterator.add( currStartToken.makeCopy() );
}
// back to the previous place, before adding new start tokens
for (int i = 0; i < toBeCopied.size(); i++) {
nodeIterator.previous();
}
}
}
nodeIterator.previous();
// if this open tag is not allowed inside last open tag, then it must be moved to the place where it can be
} else if ( !isAllowedInLastOpenTag(token) ) {
saveToLastOpenTag(nodeList, token);
nodeIterator.set(null);
// if it is known HTML tag but doesn't allow body, it is immidiately closed
} else if ( tag != null && !tag.allowsBody() ) {
TagNode newTagNode = createTagNode(startTagToken);
if ( tag.isHeadTag() ) {
headNode.addChild(newTagNode);
nodeIterator.set(null);
} else {
nodeIterator.set(newTagNode);
}
// default case - just remember this open tag and go further
} else {
_openTags.addTag( tagName, nodeIterator.previousIndex() );
}
} else {
if ( !isAllowedInLastOpenTag(token) ) {
saveToLastOpenTag(nodeList, token);
nodeIterator.set(null);
}
}
}
}
private void createDocumentNodes(List listNodes) {
Iterator it = listNodes.iterator();
while (it.hasNext()) {
Object child = it.next();
if (child == null) {
continue;
}
TagNode parent = bodyNode;
boolean toAdd = true;
if (child instanceof TagNode) {
TagInfo tag = tagInfoProvider.getTagInfo( ((TagNode)child).getName() );
if (tag != null) {
if ( tag.isHeadTag() || (tag.isHeadAndBodyTag() && bodyNode.getChildren().isEmpty()) ) {
parent = headNode;
}
}
} else {
if (child instanceof ContentToken) {
toAdd = !"".equals( ((ContentToken)child).toString() );
}
}
if (toAdd) {
parent.addChild(child);
}
}
}
private List closeSnippet(List nodeList, TagPos tagPos, Object toNode) {
List closed = new ArrayList();
ListIterator it = nodeList.listIterator(tagPos.position);
TagNode tagNode = null;
Object item = it.next();
boolean isListEnd = false;
while ( (toNode == null && !isListEnd) || (toNode != null && item != toNode) ) {
if ( isStartToken(item) ) {
TagNode startTagToken = (TagNode) item;
closed.add(startTagToken);
List itemsToMove = startTagToken.getItemsToMove();
if (itemsToMove != null) {
OpenTags prevOpenTags = _openTags;
_openTags = new OpenTags();
makeTree(itemsToMove, itemsToMove.listIterator(0));
closeAll(itemsToMove);
startTagToken.setItemsToMove(null);
_openTags = prevOpenTags;
}
TagNode newTagNode = createTagNode(startTagToken);
TagInfo tag = tagInfoProvider.getTagInfo( newTagNode.getName() );
if ( tag != null && tag.isHeadTag() ) {
headNode.addChild(newTagNode);
it.set(null);
} else if (tagNode != null) {
tagNode.addChildren(itemsToMove);
tagNode.addChild(newTagNode);
it.set(null);
} else {
if (itemsToMove != null) {
itemsToMove.add(newTagNode);
it.set(itemsToMove);
} else {
it.set(newTagNode);
}
}
_openTags.removeTag( newTagNode.getName() );
tagNode = newTagNode;
} else {
if (tagNode != null) {
it.set(null);
if (item != null) {
tagNode.addChild(item);
}
}
}
if ( it.hasNext() ) {
item = it.next();
} else {
isListEnd = true;
}
}
return closed;
}
/**
* Close all unclosed tags if there are any.
*/
private void closeAll(List nodeList) {
TagPos firstTagPos = _openTags.findFirstTagPos();
if (firstTagPos != null) {
closeSnippet(nodeList, firstTagPos, null);
}
}
// setters and getters
public boolean isOmitUnknownTags() {
return omitUnknownTags;
}
public void setOmitUnknownTags(boolean omitUnknownTags) {
this.omitUnknownTags = omitUnknownTags;
}
public boolean isOmitDeprecatedTags() {
return omitDeprecatedTags;
}
public void setOmitDeprecatedTags(boolean omitDeprecatedTags) {
this.omitDeprecatedTags = omitDeprecatedTags;
}
public boolean isAdvancedXmlEscape() {
return advancedXmlEscape;
}
public void setAdvancedXmlEscape(boolean advancedXmlEscape) {
this.advancedXmlEscape = advancedXmlEscape;
}
public boolean isUseCdataForScriptAndStyle() {
return useCdataForScriptAndStyle;
}
public void setUseCdataForScriptAndStyle(boolean useCdataForScriptAndStyle) {
this.useCdataForScriptAndStyle = useCdataForScriptAndStyle;
}
public boolean isTranslateSpecialEntities() {
return translateSpecialEntities;
}
public void setTranslateSpecialEntities(boolean translateSpecialEntities) {
this.translateSpecialEntities = translateSpecialEntities;
}
public boolean isRecognizeUnicodeChars() {
return recognizeUnicodeChars;
}
public void setRecognizeUnicodeChars(boolean recognizeUnicodeChars) {
this.recognizeUnicodeChars = recognizeUnicodeChars;
}
public boolean isOmitComments() {
return omitComments;
}
public void setOmitComments(boolean omitComments) {
this.omitComments = omitComments;
}
public boolean isOmitXmlDeclaration() {
return omitXmlDeclaration;
}
public void setOmitXmlDeclaration(boolean omitXmlDeclaration) {
this.omitXmlDeclaration = omitXmlDeclaration;
}
public boolean isOmitDoctypeDeclaration() {
return omitDoctypeDeclaration;
}
public void setOmitDoctypeDeclaration(boolean omitDoctypeDeclaration) {
this.omitDoctypeDeclaration = omitDoctypeDeclaration;
}
public boolean isOmitXmlnsAttributes() {
return omitXmlnsAttributes;
}
public void setOmitXmlnsAttributes(boolean omitXmlnsAttributes) {
this.omitXmlnsAttributes = omitXmlnsAttributes;
}
public String getHyphenReplacementInComment() {
return hyphenReplacementInComment;
}
public void setHyphenReplacementInComment(String hyphenReplacementInComment) {
this.hyphenReplacementInComment = hyphenReplacementInComment;
}
public Set getAllTags() {
return allTags;
}
// methods for writing result
/**
* The most general way to serialize resulting XML.
* @param xmlSerializer
* @throws IOException
*/
public void writeXml(XmlSerializer xmlSerializer) throws IOException {
xmlSerializer.createXml(htmlNode);
}
private void writeXml(Writer writer, int method) throws IOException {
XmlSerializer xmlSerializer = null;
if (WRITE_METHOD_COMPACT == method) {
xmlSerializer = new CompactXmlSerializer(writer, this);
} else if (WRITE_METHOD_PRETTY == method) {
xmlSerializer = new PrettyXmlSerializer(writer, this);
} else {
xmlSerializer = new SimpleXmlSerializer(writer, this);
}
xmlSerializer.createXml(htmlNode);
}
private void writeToStream(OutputStream out, String charset, int method) throws IOException {
BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(out, charset) );
writeXml(writer, method);
}
private void writeToStream(OutputStream out, int method) throws IOException {
BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(out) );
writeXml(writer, method);
}
public void writeXmlToStream(OutputStream out) throws IOException {
writeToStream(out, WRITE_METHOD_SIMPLE);
}
public void writeXmlToStream(OutputStream out, String charset) throws IOException {
writeToStream(out, charset, WRITE_METHOD_SIMPLE);
}
public void writeCompactXmlToStream(OutputStream out) throws IOException {
writeToStream(out, WRITE_METHOD_COMPACT);
}
public void writeCompactXmlToStream(OutputStream out, String charset) throws IOException {
writeToStream(out, charset, WRITE_METHOD_COMPACT);
}
public void writePrettyXmlToStream(OutputStream out) throws IOException {
writeToStream(out, WRITE_METHOD_PRETTY);
}
public void writePrettyXmlToStream(OutputStream out, String charset) throws IOException {
writeToStream(out, charset, WRITE_METHOD_PRETTY);
}
private void writeToFile(String fileName, String charset, int method) throws IOException {
writeToStream(new FileOutputStream(fileName), charset, method );
}
private void writeToFile(String fileName, int method) throws IOException {
writeToStream( new FileOutputStream(fileName), method );
}
public void writeXmlToFile(String fileName) throws IOException {
writeToFile(fileName, WRITE_METHOD_SIMPLE);
}
public void writeXmlToFile(String fileName, String charset) throws IOException {
writeToFile(fileName, charset, WRITE_METHOD_SIMPLE);
}
public void writeCompactXmlToFile(String fileName) throws IOException {
writeToFile(fileName, WRITE_METHOD_COMPACT);
}
public void writeCompactXmlToFile(String fileName, String charset) throws IOException {
writeToFile(fileName, charset, WRITE_METHOD_COMPACT);
}
public void writePrettyXmlToFile(String fileName) throws IOException {
writeToFile(fileName, WRITE_METHOD_PRETTY);
}
public void writePrettyXmlToFile(String fileName, String charset) throws IOException {
writeToFile(fileName, charset, WRITE_METHOD_PRETTY);
}
public String getXmlAsString() throws IOException {
StringWriter writer = new StringWriter();
writeXml(writer, WRITE_METHOD_SIMPLE);
return writer.getBuffer().toString();
}
public String getCompactXmlAsString() throws IOException {
StringWriter writer = new StringWriter();
writeXml(writer, WRITE_METHOD_COMPACT);
return writer.getBuffer().toString();
}
public String getPrettyXmlAsString() throws IOException {
StringWriter writer = new StringWriter();
writeXml(writer, WRITE_METHOD_PRETTY);
return writer.getBuffer().toString();
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?