htmlparser.java

来自「java 写的一个新闻发布系统」· Java 代码 · 共 194 行

JAVA
194
字号
////                                   ____.//                       __/\ ______|    |__/\.     _______//            __   .____|    |       \   |    +----+       \//    _______|  /--|    |    |    -   \  _    |    :    -   \_________//   \\______: :---|    :    :           |    :    |         \________>//           |__\---\_____________:______:    :____|____:_____\//                                      /_____|////                 . . . i n   j a h i a   w e   t r u s t . . .////--------------------------// HTMLParser//--------------------------// Jerome Bedat 14.12.2000//--------------------------package jahiatemplates.org.jahia.portlets_api;import java.io.*;import java.util.*;import org.apache.regexp.*;             // Regular Expressionimport org.jahia.utils.*;           // JahiaConsolepublic class HTMLParser{    private static HTMLParser theObject = null;    private String newHTML;    private String newString;    private String theScripts = "";    private boolean matched;    private String theResult;    	    /**     * HTMLParser     *     * @author Jerome Bedat     *     */	private HTMLParser() 	{	    JahiaConsole.println( "HTML Parser", "Parsing..." );	} // end Constructor    /**     * getInstance     *     * @author Jerome Bedat     *     */	public static synchronized HTMLParser getInstance()	{	    if (theObject == null)	    {	        theObject = new HTMLParser();	    }	    return theObject;	} // end getInstance    /**     * getDHTML  (replace ('")  with \' or \", delete new lines and set the content of script tags to a variable)     *     * @author Jerome Bedat     *     */       public String getDHTML(String oldHTML)    {        try         {            newHTML = oldHTML;                        RE r = new RE("\\n");            newHTML = r.subst(newHTML,"");                        r = new RE("\\r");            newHTML = r.subst(newHTML,"");             r = new RE("<script(.*?)</script>", RE.MATCH_CASEINDEPENDENT);            matched = r.match(newHTML);                  theResult = r.getParen(0);            if (theResult != null) { setScripts(theResult); }            newHTML = r.subst(newHTML,"");                        r = new RE("<style(.*?)</style>", RE.MATCH_CASEINDEPENDENT);            matched = r.match(newHTML);                             theResult = r.getParen(0);            if (theResult != null) { setScripts(theResult); }            newHTML = r.subst(newHTML,"");            r = new RE("<link(.*?)>", RE.MATCH_CASEINDEPENDENT);            matched = r.match(newHTML);                             theResult = r.getParen(0);            if (theResult != null) { setScripts(theResult); }            newHTML = r.subst(newHTML,"");            r = new RE("<!(.*?)>");            newHTML = r.subst(newHTML,"");                        r = new RE("\'");            newHTML = r.subst(newHTML,"\\'");                        // r = new RE("\"");            // newHTML = r.subst(newHTML,"\\\"");                    }        catch (RESyntaxException e)        {            JahiaConsole.println("HTML Parser",e.toString());        }        return newHTML;    } // end getDHTML    /**     * getQuoteString  (replace ('")  with \' or \"     *     * @author Jerome Bedat     *     */       public String getQuoteString(String oldHTML)     {        try         {            newHTML = oldHTML;                        RE r = new RE("\'");            newHTML = r.subst(newHTML,"\\'");                        // r = new RE("\"");            // newHTML = r.subst(newHTML,"\\\"");                    }        catch (RESyntaxException e)        {            JahiaConsole.println("HTML Parser",e.toString());        }        return newHTML;    } // end getQuoteString     /**     * getHTMLString  (replace (" ")  with "%nbsp;")     *     * @author Jerome Bedat     *     */    public String getHTMLString(String oldString)     {        try         {            newString = oldString;                        RE r = new RE(" ");            newString = r.subst(newString,"&nbsp;");                }        catch (RESyntaxException e)        {            JahiaConsole.println("HTML Parser",e.toString());        }        return newString;    } // end getHTMLString            /**     * setScripts  (Set content of script tags to a single variable)     *     * @author Jerome Bedat     *     */    private void setScripts(String theScript) {        theScripts += theScript;    } // end setScripts    /**     * getScripts  (Get content of script tags)     *     * @author Jerome Bedat     *     */        public String getScripts() {        theResult = theScripts;        theScripts = "";         return theResult;    } // end getScripts}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?