⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parserutils.java

📁 html 解析处理代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    }        /**     * Trim all tags in the input string and     * return a string like the input one     * without the tags and their content.     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).     */    public static String trimTags (String input, String[] tags)        throws ParserException, UnsupportedEncodingException    {        return trimTags (input, tags, true, true);    }        /**     * Trim all tags in the input string and     * return a string like the input one     * without the tags and their content (optional).     * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}),     * <BR>you obtain a string &quot; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and their content recursively).     * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, false),     * <BR>you obtain a string &quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and not their content and no recursively).     * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, true, false),     * <BR>you obtain a string &quot;  +12.5  ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and not their content recursively).     * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, true),     * <BR>you obtain a string &quot; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and their content).     * @param input The string in input.     * @param tags The tags to be removed.     * @param recursive Optional parameter (true if not present), if true delete all the tags recursively.     * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags.     * @return The string without tags.     */    public static String trimTags (String input, String[] tags, boolean recursive, boolean insideTag)        throws ParserException, UnsupportedEncodingException    { 	        StringBuffer output = new StringBuffer();        String inputModified = new String(input);        String dummyString = createDummyString (' ', input.length());                    // loop inside the different tags to be trimmed        for (int i=0; i<tags.length; i++)        {            output = new StringBuffer();                        // loop inside the tags of the same type            NodeList links = getLinks (inputModified, tags[i], recursive);            for (int j=0; j<links.size(); j++)            {                CompositeTag beginTag = (CompositeTag)links.elementAt(j);                Tag endTag = beginTag.getEndTag();                // positions of begin and end tags                int beginTagBegin = beginTag.getStartPosition ();                int endTagBegin = beginTag.getEndPosition ();                int beginTagEnd = endTag.getStartPosition ();                int endTagEnd = endTag.getEndPosition ();                if (insideTag)                {                    dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd);                }                else                {                    dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin);                    dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);                }            }            for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)            {                int kNew = dummyString.indexOf('*',k);                if (kNew!=-1)                {                    output = output.append(inputModified.substring(k,kNew));                    k = dummyString.indexOf(' ',kNew);                }                else                {                    output = output.append(inputModified.substring(k,dummyString.length()));                    k = kNew;                }            }            inputModified = new String(output);            dummyString = createDummyString (' ', inputModified.length());        }                return output.toString();            }        /**     * Trim all tags in the input string and     * return a string like the input one     * without the tags and their content.     * <BR>Use Class class as input parameter     * instead of tags[] string array.     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).     */    public static String trimTags (String input, Class nodeType)        throws ParserException, UnsupportedEncodingException    {        return trimTags (input, new NodeClassFilter (nodeType), true, true);    }    /**     * Trim all tags in the input string and     * return a string like the input one     * without the tags and their content (optional).     * <BR>Use Class class as input parameter     * instead of tags[] string array.     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).     */    public static String trimTags (String input, Class nodeType, boolean recursive, boolean insideTag)        throws ParserException, UnsupportedEncodingException    {        return trimTags (input, new NodeClassFilter (nodeType), recursive, insideTag);    }    /**     * Trim all tags in the input string and     * return a string like the input one     * without the tags and their content.     * <BR>Use NodeFilter class as input parameter     * instead of tags[] string array.     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).     */    public static String trimTags (String input, NodeFilter filter)        throws ParserException, UnsupportedEncodingException    {        return trimTags (input, filter, true, true);    }        /**     * Trim all tags in the input string and     * return a string like the input one     * without the tags and their content (optional).     * <BR>Use NodeFilter class as input parameter     * instead of tags[] string array.     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).     */    public static String trimTags (String input, NodeFilter filter, boolean recursive, boolean insideTag)        throws ParserException, UnsupportedEncodingException    { 	        StringBuffer output = new StringBuffer();                String dummyString = createDummyString (' ', input.length());        // loop inside the tags of the same type        NodeList links = getLinks (input, filter, recursive);        for (int j=0; j<links.size(); j++)        {            CompositeTag beginTag = (CompositeTag)links.elementAt(j);            Tag endTag = beginTag.getEndTag();            // positions of begin and end tags            int beginTagBegin = beginTag.getStartPosition ();            int endTagBegin = beginTag.getEndPosition ();            int beginTagEnd = endTag.getStartPosition ();            int endTagEnd = endTag.getEndPosition ();            if (insideTag)            {                dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd);            }            else            {                dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin);                dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);            }        }        for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)        {            int kNew = dummyString.indexOf('*',k);            if (kNew!=-1)            {                output = output.append(input.substring(k,kNew));                k = dummyString.indexOf(' ',kNew);            }            else            {                output = output.append(input.substring(k,dummyString.length()));                k = kNew;            }                    }                return output.toString();            }        /**     * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location).     * <BR>The string will be parsed as it would be a file.     * @param input The string in input.     * @return The Parser Object with the string as input stream.     */    public static Parser createParserParsingAnInputString (String input)        throws ParserException, UnsupportedEncodingException    { 	        Parser parser = new Parser();        Lexer lexer = new Lexer();        Page page = new Page(input);        lexer.setPage(page);        parser.setLexer(lexer);                return parser;            }    private static NodeList getLinks (String output, String tag, boolean recursive)        throws ParserException, UnsupportedEncodingException    {                Parser parser = new Parser();        NodeFilter filterLink = new TagNameFilter (tag);        NodeList links = new NodeList ();        parser = createParserParsingAnInputString(output);        links = parser.extractAllNodesThatMatch(filterLink);        // loop to remove tags added recursively        // so if you have selected 'not recursive option'        // you have only the tag container and not the contained tags.        if (!recursive)        {            for (int j=0; j<links.size(); j++)            {                CompositeTag jStartTag = (CompositeTag)links.elementAt(j);                Tag jEndTag = jStartTag.getEndTag();                int jStartTagBegin = jStartTag.getStartPosition ();                int jEndTagEnd = jEndTag.getEndPosition ();                for (int k=0; k<links.size(); k++)                {                    CompositeTag kStartTag = (CompositeTag)links.elementAt(k);                    Tag kEndTag = kStartTag.getEndTag();                    int kStartTagBegin = kStartTag.getStartPosition ();                    int kEndTagEnd = kEndTag.getEndPosition ();                    if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd))                    {                        links.remove(k);                        k--;                        j--;                    }                }            }        }                return links;            }        private static NodeList getLinks (String output, NodeFilter filter, boolean recursive)        throws ParserException, UnsupportedEncodingException    {                Parser parser = new Parser();        NodeList links = new NodeList ();        parser = createParserParsingAnInputString(output);        links = parser.extractAllNodesThatMatch(filter);        // loop to remove tags added recursively        // so if you have selected 'not recursive option'        // you have only the tag container and not the contained tags.        if (!recursive)        {            for (int j=0; j<links.size(); j++)            {                CompositeTag jStartTag = (CompositeTag)links.elementAt(j);                Tag jEndTag = jStartTag.getEndTag();                int jStartTagBegin = jStartTag.getStartPosition ();                int jEndTagEnd = jEndTag.getEndPosition ();                for (int k=0; k<links.size(); k++)                {                    CompositeTag kStartTag = (CompositeTag)links.elementAt(k);                    Tag kEndTag = kStartTag.getEndTag();                    int kStartTagBegin = kStartTag.getStartPosition ();                    int kEndTagEnd = kEndTag.getEndPosition ();                    if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd))                    {                        links.remove(k);                        k--;                        j--;                    }                }            }        }                return links;            }        private static String createDummyString (char fillingChar, int length)    {        StringBuffer dummyStringBuffer = new StringBuffer();        for (int j=0; j<length; j++)            dummyStringBuffer = dummyStringBuffer.append(fillingChar);        return new String(dummyStringBuffer);    }        private static String modifyDummyString (String dummyString, int beginTag, int endTag)    {        String dummyStringInterval = createDummyString ('*', endTag-beginTag);        return new String(dummyString.substring(0, beginTag) + dummyStringInterval + dummyString.substring(endTag, dummyString.length()));    }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -