⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexertests.java

📁 html to xml convertor
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
//        int nodecount;//        Node node;//        int charcount;////        url = new URL (link);//        connection = url.openConnection ();//        connection.connect ();//        source = new Source (new Stream (connection.getInputStream ()));//        buffer = new StringBuffer (350000);//        while (-1 != (i = source.read ()))//            buffer.append ((char)i);//        source.close ();//        html = buffer.toString ();//        old_total = 0;//        new_total = 0;//        for (i = 0; i < 5; i++)//        {//            System.gc ();//            begin = System.currentTimeMillis ();//            Lexer lexer = new Lexer (html);//            nodecount = 0;//            while (null != (node = lexer.nextNode ()))//                nodecount++;//            end = System.currentTimeMillis ();//            System.out.println ("     lexer: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                new_total += (end - begin);////            System.gc ();//            begin = System.currentTimeMillis ();//            reader = new StringReader (html);//            nodes =  new NodeReader (new BufferedReader (reader), 350000);//            parser = new Parser (nodes, null);//            nodecount = 0;//            while (null != (node = nodes.readElement ()))//                nodecount++;//            end = System.currentTimeMillis ();//            System.out.println ("old reader: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                old_total += (end - begin);//        }//        assertTrue ("old parser is" + ((double)(new_total - old_total)/(double)old_total*100.0) + "% faster", new_total < old_total);//        System.out.println ("lexer is " + ((double)(old_total - new_total)/(double)old_total*100.0) + "% faster");//    }////    /**//     * Test the relative speed reading from a string parsing tags too.//     *///    public void testSpeedStringWithTags () throws ParserException, IOException//    {//        final String link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";//        URL url;//        URLConnection connection;//        Source source;//        StringBuffer buffer;//        int i;//        String html;////        long old_total;//        long new_total;//        long begin;//        long end;//        StringReader reader;//        NodeReader nodes;//        Parser parser;//        int nodecount;//        Node node;//        int charcount;////        url = new URL (link);//        connection = url.openConnection ();//        connection.connect ();//        source = new Source (new Stream (connection.getInputStream ()));//        buffer = new StringBuffer (350000);//        while (-1 != (i = source.read ()))//            buffer.append ((char)i);//        source.close ();//        html = buffer.toString ();//        old_total = 0;//        new_total = 0;//        for (i = 0; i < 5; i++)//        {//            System.gc ();//            begin = System.currentTimeMillis ();//            Lexer lexer = new Lexer (html);//            nodecount = 0;//            while (null != (node = lexer.nextNode ()))//            {//                nodecount++;//                if (node instanceof TagNode)//                    ((TagNode)node).getAttributes ();//            }//            end = System.currentTimeMillis ();//            System.out.println ("     lexer: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                new_total += (end - begin);////            System.gc ();//            begin = System.currentTimeMillis ();//            reader = new StringReader (html);//            nodes =  new NodeReader (new BufferedReader (reader), 350000);//            parser = new Parser (nodes, null);//            nodecount = 0;//            while (null != (node = nodes.readElement ()))//            {//                nodecount++;//                if (node instanceof Tag)//                    ((Tag)node).getAttributes ();//            }//            end = System.currentTimeMillis ();//            System.out.println ("old reader: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                old_total += (end - begin);//        }//        assertTrue ("old parser is" + ((double)(new_total - old_total)/(double)old_total*100.0) + "% faster", new_total < old_total);//        System.out.println ("lexer is " + ((double)(old_total - new_total)/(double)old_total*100.0) + "% faster");//    }////    public void testSpeedStreamWithoutTags () throws ParserException, IOException//    {//        final String link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";//        URL url;//        URLConnection connection;//        Source source;//        StringBuffer buffer;//        int i;//        String html;//        InputStream stream;////        long old_total;//        long new_total;//        long begin;//        long end;//        InputStreamReader reader;//        NodeReader nodes;//        Parser parser;//        int nodecount;//        Node node;//        int charcount;////        url = new URL (link);//        connection = url.openConnection ();//        connection.connect ();//        source = new Source (new Stream (connection.getInputStream ()));//        buffer = new StringBuffer (350000);//        while (-1 != (i = source.read ()))//            buffer.append ((char)i);//        source.close ();//        html = buffer.toString ();//        old_total = 0;//        new_total = 0;////        for (i = 0; i < 5; i++)//        {////            System.gc ();//            begin = System.currentTimeMillis ();//            stream = new ByteArrayInputStream (html.getBytes (Page.DEFAULT_CHARSET));//            Lexer lexer = new Lexer (new Page (stream, Page.DEFAULT_CHARSET));//            nodecount = 0;//            while (null != (node = lexer.nextNode ()))//                nodecount++;//            end = System.currentTimeMillis ();//            System.out.println ("     lexer: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                new_total += (end - begin);////            System.gc ();//            begin = System.currentTimeMillis ();//            stream = new ByteArrayInputStream (html.getBytes (Page.DEFAULT_CHARSET));//            reader = new InputStreamReader (stream);//            nodes =  new NodeReader (reader, 350000);//            parser = new Parser (nodes, null);//            nodecount = 0;//            while (null != (node = nodes.readElement ()))//                nodecount++;//            end = System.currentTimeMillis ();//            System.out.println ("old reader: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                old_total += (end - begin);////        }//        assertTrue ("old parser is" + ((double)(new_total - old_total)/(double)old_total*100.0) + "% faster", new_total < old_total);//        System.out.println ("lexer is " + ((double)(old_total - new_total)/(double)old_total*100.0) + "% faster");//    }////    public void testSpeedStreamWithTags () throws ParserException, IOException//    {//        final String link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";//        URL url;//        URLConnection connection;//        Source source;//        StringBuffer buffer;//        int i;//        String html;//        InputStream stream;////        long old_total;//        long new_total;//        long begin;//        long end;//        InputStreamReader reader;//        NodeReader nodes;//        Parser parser;//        int nodecount;//        Node node;//        int charcount;////        url = new URL (link);//        connection = url.openConnection ();//        connection.connect ();//        source = new Source (new Stream (connection.getInputStream ()));//        buffer = new StringBuffer (350000);//        while (-1 != (i = source.read ()))//            buffer.append ((char)i);//        source.close ();//        html = buffer.toString ();//        old_total = 0;//        new_total = 0;////        for (i = 0; i < 5; i++)//        {////            System.gc ();//            begin = System.currentTimeMillis ();//            stream = new ByteArrayInputStream (html.getBytes (Page.DEFAULT_CHARSET));//            Lexer lexer = new Lexer (new Page (stream, Page.DEFAULT_CHARSET));//            nodecount = 0;//            while (null != (node = lexer.nextNode ()))//            {//                nodecount++;//                if (node instanceof TagNode)//                    ((TagNode)node).getAttributes ();//            }//            end = System.currentTimeMillis ();//            System.out.println ("     lexer: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                new_total += (end - begin);////            System.gc ();//            begin = System.currentTimeMillis ();//            stream = new ByteArrayInputStream (html.getBytes (Page.DEFAULT_CHARSET));//            reader = new InputStreamReader (stream);//            nodes =  new NodeReader (reader, 350000);//            parser = new Parser (nodes, null);//            nodecount = 0;//            while (null != (node = nodes.readElement ()))//            {//                nodecount++;//                if (node instanceof Tag)//                    ((Tag)node).getAttributes ();//            }//            end = System.currentTimeMillis ();//            System.out.println ("old reader: " + (end - begin) + " msec, " + nodecount + " nodes");//            if (0 != i) // the first timing is way different//                old_total += (end - begin);//        }//        assertTrue ("old parser is" + ((double)(new_total - old_total)/(double)old_total*100.0) + "% faster", new_total < old_total);//        System.out.println ("lexer is " + ((double)(old_total - new_total)/(double)old_total*100.0) + "% faster");//    }//    public static void main (String[] args) throws ParserException, IOException//    {//        LexerTests tests = new LexerTests ("hallow");//        tests.testSpeedStreamWithTags ();//    }    static final HashSet mAcceptable;    static    {        mAcceptable = new HashSet ();        mAcceptable.add ("A");        mAcceptable.add ("BODY");        mAcceptable.add ("BR");        mAcceptable.add ("CENTER");        mAcceptable.add ("FONT");        mAcceptable.add ("HEAD");        mAcceptable.add ("HR");        mAcceptable.add ("HTML");        mAcceptable.add ("IMG");        mAcceptable.add ("P");        mAcceptable.add ("TABLE");        mAcceptable.add ("TD");        mAcceptable.add ("TITLE");        mAcceptable.add ("TR");        mAcceptable.add ("META");        mAcceptable.add ("STRONG");        mAcceptable.add ("FORM");        mAcceptable.add ("INPUT");        mAcceptable.add ("!DOCTYPE");        mAcceptable.add ("TBODY");        mAcceptable.add ("B");        mAcceptable.add ("DIV");        mAcceptable.add ("SCRIPT");        mAcceptable.add ("NOSCRIPT");        mAcceptable.add ("STYLE");        mAcceptable.add ("SPAN");        mAcceptable.add ("UL");        mAcceptable.add ("LI");        mAcceptable.add ("IFRAME");        mAcceptable.add ("LINK");        mAcceptable.add ("H1");        mAcceptable.add ("H3");        mAcceptable.add ("OBJECT");        mAcceptable.add ("PARAM");        mAcceptable.add ("EMBED");    }    /**     * Test case for bug #789439 Japanese page causes OutOfMemory Exception     * No exception is thrown in the current version of the parser,     * however, the problem is that ISO-2022-JP (aka JIS) encoding sometimes     * causes spurious tags.     * The root cause is characters bracketed by [esc]$B and [esc](J (contrary     * to what is indicated in then j_s_nightingale analysis of the problem) that     * sometimes have an angle bracket (&lt; or 0x3c) embedded in them. These     * are taken to be tags by the parser, instead of being considered strings.     * <p>     * The URL refrenced has an ISO-8859-1 encoding (the default), but     * Japanese characters intermixed on the page with English, using the JIS

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -