📄 parserutils.java
字号:
} /** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, String[] tags) throws ParserException, UnsupportedEncodingException { return trimTags (input, tags, true, true); } /** * Trim all tags in the input string and * return a string like the input one * without the tags and their content (optional). * <BR>For example if you call trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}), * <BR>you obtain a string " ALL OK" as output (trimmed <DIV> tags and their content recursively). * <BR>For example if you call trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, false, false), * <BR>you obtain a string "<DIV> +12.5 </DIV> ALL OK" as output (trimmed <DIV> tags and not their content and no recursively). * <BR>For example if you call trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, true, false), * <BR>you obtain a string " +12.5 ALL OK" as output (trimmed <DIV> tags and not their content recursively). * <BR>For example if you call trimTags("<DIV><DIV> +12.5 </DIV></DIV> ALL OK", new String[] {"DIV"}, false, true), * <BR>you obtain a string " ALL OK" as output (trimmed <DIV> tags and their content). * @param input The string in input. * @param tags The tags to be removed. * @param recursive Optional parameter (true if not present), if true delete all the tags recursively. * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags. * @return The string without tags. */ public static String trimTags (String input, String[] tags, boolean recursive, boolean insideTag) throws ParserException, UnsupportedEncodingException { StringBuffer output = new StringBuffer(); String inputModified = new String(input); String dummyString = createDummyString (' ', input.length()); // loop inside the different tags to be trimmed for (int i=0; i<tags.length; i++) { output = new StringBuffer(); // loop inside the tags of the same type NodeList links = getLinks (inputModified, tags[i], recursive); for (int j=0; j<links.size(); j++) { CompositeTag beginTag = (CompositeTag)links.elementAt(j); Tag endTag = beginTag.getEndTag(); // positions of begin and end tags int beginTagBegin = beginTag.getStartPosition (); int endTagBegin = beginTag.getEndPosition (); int beginTagEnd = endTag.getStartPosition (); int endTagEnd = endTag.getEndPosition (); if (insideTag) { dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd); } else { dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin); dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd); } } for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);) { int kNew = dummyString.indexOf('*',k); if (kNew!=-1) { output = output.append(inputModified.substring(k,kNew)); k = dummyString.indexOf(' ',kNew); } else { output = output.append(inputModified.substring(k,dummyString.length())); k = kNew; } } inputModified = new String(output); dummyString = createDummyString (' ', inputModified.length()); } return output.toString(); } /** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * <BR>Use Class class as input parameter * instead of tags[] string array. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, Class nodeType) throws ParserException, UnsupportedEncodingException { return trimTags (input, new NodeClassFilter (nodeType), true, true); } /** * Trim all tags in the input string and * return a string like the input one * without the tags and their content (optional). * <BR>Use Class class as input parameter * instead of tags[] string array. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, Class nodeType, boolean recursive, boolean insideTag) throws ParserException, UnsupportedEncodingException { return trimTags (input, new NodeClassFilter (nodeType), recursive, insideTag); } /** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * <BR>Use NodeFilter class as input parameter * instead of tags[] string array. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, NodeFilter filter) throws ParserException, UnsupportedEncodingException { return trimTags (input, filter, true, true); } /** * Trim all tags in the input string and * return a string like the input one * without the tags and their content (optional). * <BR>Use NodeFilter class as input parameter * instead of tags[] string array. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, NodeFilter filter, boolean recursive, boolean insideTag) throws ParserException, UnsupportedEncodingException { StringBuffer output = new StringBuffer(); String dummyString = createDummyString (' ', input.length()); // loop inside the tags of the same type NodeList links = getLinks (input, filter, recursive); for (int j=0; j<links.size(); j++) { CompositeTag beginTag = (CompositeTag)links.elementAt(j); Tag endTag = beginTag.getEndTag(); // positions of begin and end tags int beginTagBegin = beginTag.getStartPosition (); int endTagBegin = beginTag.getEndPosition (); int beginTagEnd = endTag.getStartPosition (); int endTagEnd = endTag.getEndPosition (); if (insideTag) { dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd); } else { dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin); dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd); } } for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);) { int kNew = dummyString.indexOf('*',k); if (kNew!=-1) { output = output.append(input.substring(k,kNew)); k = dummyString.indexOf(' ',kNew); } else { output = output.append(input.substring(k,dummyString.length())); k = kNew; } } return output.toString(); } /** * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location). * <BR>The string will be parsed as it would be a file. * @param input The string in input. * @return The Parser Object with the string as input stream. */ public static Parser createParserParsingAnInputString (String input) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); Lexer lexer = new Lexer(); Page page = new Page(input); lexer.setPage(page); parser.setLexer(lexer); return parser; } private static NodeList getLinks (String output, String tag, boolean recursive) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); NodeFilter filterLink = new TagNameFilter (tag); NodeList links = new NodeList (); parser = createParserParsingAnInputString(output); links = parser.extractAllNodesThatMatch(filterLink); // loop to remove tags added recursively // so if you have selected 'not recursive option' // you have only the tag container and not the contained tags. if (!recursive) { for (int j=0; j<links.size(); j++) { CompositeTag jStartTag = (CompositeTag)links.elementAt(j); Tag jEndTag = jStartTag.getEndTag(); int jStartTagBegin = jStartTag.getStartPosition (); int jEndTagEnd = jEndTag.getEndPosition (); for (int k=0; k<links.size(); k++) { CompositeTag kStartTag = (CompositeTag)links.elementAt(k); Tag kEndTag = kStartTag.getEndTag(); int kStartTagBegin = kStartTag.getStartPosition (); int kEndTagEnd = kEndTag.getEndPosition (); if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd)) { links.remove(k); k--; j--; } } } } return links; } private static NodeList getLinks (String output, NodeFilter filter, boolean recursive) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); NodeList links = new NodeList (); parser = createParserParsingAnInputString(output); links = parser.extractAllNodesThatMatch(filter); // loop to remove tags added recursively // so if you have selected 'not recursive option' // you have only the tag container and not the contained tags. if (!recursive) { for (int j=0; j<links.size(); j++) { CompositeTag jStartTag = (CompositeTag)links.elementAt(j); Tag jEndTag = jStartTag.getEndTag(); int jStartTagBegin = jStartTag.getStartPosition (); int jEndTagEnd = jEndTag.getEndPosition (); for (int k=0; k<links.size(); k++) { CompositeTag kStartTag = (CompositeTag)links.elementAt(k); Tag kEndTag = kStartTag.getEndTag(); int kStartTagBegin = kStartTag.getStartPosition (); int kEndTagEnd = kEndTag.getEndPosition (); if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd)) { links.remove(k); k--; j--; } } } } return links; } private static String createDummyString (char fillingChar, int length) { StringBuffer dummyStringBuffer = new StringBuffer(); for (int j=0; j<length; j++) dummyStringBuffer = dummyStringBuffer.append(fillingChar); return new String(dummyStringBuffer); } private static String modifyDummyString (String dummyString, int beginTag, int endTag) { String dummyStringInterval = createDummyString ('*', endTag-beginTag); return new String(dummyString.substring(0, beginTag) + dummyStringInterval + dummyString.substring(endTag, dummyString.length())); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -