📄 lexertests.java

📁 htmlparser1.6,对HTML语言的解析,而且对每个标签都基本上有相应的解析类,非常不错.
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML// http://sourceforge.org/projects/htmlparser// Copyright (C) 2004 Derrick Oswald//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v $// $Author: derrickoswald $// $Date: 2006/05/27 17:06:28 $// $Revision: 1.31 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.tests.lexerTests;import java.io.IOException;import java.net.URL;import java.util.HashSet;import org.htmlparser.Node;import org.htmlparser.Parser;import org.htmlparser.Remark;import org.htmlparser.Tag;import org.htmlparser.Text;import org.htmlparser.lexer.Lexer;import org.htmlparser.tags.ScriptTag;import org.htmlparser.tags.StyleTag;import org.htmlparser.tests.ParserTestCase;import org.htmlparser.util.EncodingChangeException;import org.htmlparser.util.NodeIterator;import org.htmlparser.util.NodeList;import org.htmlparser.util.ParserException;public class LexerTests extends ParserTestCase{    static    {        System.setProperty ("org.htmlparser.tests.lexerTests.LexerTests", "LexerTests");    }    /**     * Test the Lexer class.     */    public LexerTests (String name)    {        super (name);    }    /**     * Test operation without tags.     */    public void testPureText () throws ParserException    {        String reference;        Lexer lexer;        Text node;        reference = "Hello world";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());    }    /**     * Test operation with Unix line endings.     */    public void testUnixEOL () throws ParserException    {        String reference;        Lexer lexer;        Text node;        reference = "Hello\nworld";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());    }    /**     * Test operation with Dos line endings.     */    public void testDosEOL () throws ParserException    {        String reference;        Lexer lexer;        Text node;        reference = "Hello\r\nworld";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());        reference = "Hello\rworld";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());    }    /**     * Test operation with line endings near the end of input.     */    public void testEOF_EOL () throws ParserException    {        String reference;        Lexer lexer;        Text node;        reference = "Hello world\n";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());        reference = "Hello world\r";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());        reference = "Hello world\r\n";        lexer = new Lexer (reference);        node = (Text)lexer.nextNode ();        assertEquals ("Text contents wrong", reference, node.getText ());    }    /**     * Test that tags stop string nodes.     */    public void testTagStops () throws ParserException    {        String[] references =        {            "Hello world",            "Hello world\n",            "Hello world\r\n",            "Hello world\r",        };        String[] suffixes =        {            "<head>",            "</head>",            "<%=head%>",            "<?php ?>",            "<!--head-->",        };        Lexer lexer;        Text node;        for (int i = 0; i < references.length; i++)        {            for (int j = 0; j < suffixes.length; j++)            {                lexer = new Lexer (references[i] + suffixes[j]);                node = (Text)lexer.nextNode ();                assertEquals ("Text contents wrong", references[i], node.getText ());            }        }    }    /**     * Test operation with only tags.     */    public void testPureTag () throws ParserException    {        String reference;        String suffix;        Lexer lexer;        Node node;        reference = "<head>";        lexer = new Lexer (reference);        node = lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        reference = "<head>";        suffix = "<body>";        lexer = new Lexer (reference + suffix);        node = lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        node = lexer.nextNode ();        assertEquals ("Tag contents wrong", suffix, node.toHtml ());    }    /**     * Test operation with attributed tags.     */    public void testAttributedTag () throws ParserException    {        String reference;        Lexer lexer;        Node node;        reference = "<head lang='en_US' dir=ltr\nprofile=\"http://htmlparser.sourceforge.org/dictionary.html\">";        lexer = new Lexer (reference);        node = lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());    }    /**     * Test operation with comments.     */    public void testRemark () throws ParserException    {        String reference;        Lexer lexer;        Remark node;        String suffix;        reference = "<!-- This is a comment -->";        lexer = new Lexer (reference);        node = (Remark)lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        reference = "<!-- This is a comment --  >";        lexer = new Lexer (reference);        node = (Remark)lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        reference = "<!-- This is a\nmultiline comment -->";        lexer = new Lexer (reference);        node = (Remark)lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        suffix = "<head>";        reference = "<!-- This is a comment -->";        lexer = new Lexer (reference + suffix);        node = (Remark)lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        reference = "<!-- This is a comment --  >";        lexer = new Lexer (reference + suffix);        node = (Remark)lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());        reference = "<!-- This is a\nmultiline comment -->";        lexer = new Lexer (reference + suffix);        node = (Remark)lexer.nextNode ();        assertEquals ("Tag contents wrong", reference, node.toHtml ());    }//    /**//     * Try a real page.//     *///    public void testReal () throws ParserException, IOException//    {//        Lexer lexer;//        Node node;////        URL url = new URL ("http://sourceforge.net/projects/htmlparser");//        lexer = new Lexer (url.openConnection ());//        while (null != (node = lexer.nextNode ()))//            System.out.println (node.toString ());//    }    /**     * Test the fidelity of the toHtml() method.     */    public void testFidelity () throws ParserException, IOException    {        Lexer lexer;        Node node;        int position;        StringBuffer buffer;        String string;        char[] ref;        char[] test;        URL url = new URL ("http://sourceforge.net");        lexer = new Lexer (url.openConnection ());        position = 0;        buffer = new StringBuffer (80000);        while (null != (node = lexer.nextNode ()))        {            string = node.toHtml ();            if (position != node.getStartPosition ())                fail ("non-contiguous" + string);            buffer.append (string);            position = node.getEndPosition ();            if (buffer.length () != position)                fail ("text length differed after encountering node " + string);        }        ref = lexer.getPage ().getText ().toCharArray ();        test = new char[buffer.length ()];        buffer.getChars (0, buffer.length (), test, 0);        assertEquals ("different amounts of text", ref.length, test.length);        for (int i = 0; i < ref.length; i++)            if (ref[i] != test[i])                fail ("character differs at position " + i + ", expected <" + ref[i] + "> but was <" + test[i] + ">");    }//    /**//     * Test the relative speed reading from a string parsing tags too.//     *///    public void testSpeedStringWithoutTags () throws ParserException, IOException//    {//        final String link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";//        URL url;//        URLConnection connection;//        Source source;//        StringBuffer buffer;//        int i;//        String html;////        long old_total;//        long new_total;//        long begin;//        long end;//        StringReader reader;//        NodeReader nodes;//        Parser parser;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -