📄 parsertestcase.java

📁 本程序用于对页面信息进行提取并分析
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML// http://sourceforge.org/projects/htmlparser// Copyright (C) 2004 Somik Raha//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v $// $Author: derrickoswald $// $Date: 2004/07/31 16:42:33 $// $Revision: 1.52 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.tests;import java.util.Enumeration;import java.util.Properties;import java.util.Vector;import junit.framework.TestCase;import org.htmlparser.Attribute;import org.htmlparser.Node;import org.htmlparser.Parser;import org.htmlparser.Tag;import org.htmlparser.Text;import org.htmlparser.lexer.Lexer;import org.htmlparser.lexer.Page;import org.htmlparser.nodes.TagNode;import org.htmlparser.tags.FormTag;import org.htmlparser.tags.InputTag;import org.htmlparser.util.DefaultParserFeedback;import org.htmlparser.util.NodeIterator;import org.htmlparser.util.ParserException;import org.htmlparser.util.ParserUtils;public class ParserTestCase extends TestCase {    static boolean mCaseInsensitiveComparisons = false;    protected Parser parser;    protected Node node [];    protected int nodeCount;    protected Lexer mLexer;    public ParserTestCase(String name) {        super(name);    }    protected void parse(String response) throws ParserException {        createParser(response,10000);        parseNodes();    }    protected void createParser(String inputHTML) {        mLexer =  new Lexer (new Page (inputHTML));        parser = new Parser(mLexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));        node = new Node[40];    }    protected void createParser(String inputHTML,int numNodes)    {        Lexer lexer = new Lexer (inputHTML);        parser = new Parser (lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));        node = new Node[numNodes];    }    protected void createParser(String inputHTML, String url) {        Lexer lexer = new Lexer (inputHTML);        lexer.getPage ().setUrl (url);        parser = new Parser (lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));        node = new Node[40];    }    protected void createParser(String inputHTML, String url,int numNodes) {        Lexer lexer = new Lexer (inputHTML);        lexer.getPage ().setUrl (url);        parser = new Parser (lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));        node = new Node[numNodes];    }    public Parser getParser ()    {        return (parser);    }    public void setParser (Parser parser)    {        this.parser = parser;    }    public void assertStringEquals(String message, String expected,                                      String actual) {        String mismatchInfo = "";        if (expected.length() < actual.length()) {            mismatchInfo = "\n\nACTUAL result has "+(actual.length()-expected.length())+" extra characters at the end. They are :";            int limit = Math.min (expected.length() + 10, actual.length());            for (int i = expected.length(); i < limit; i++)                mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) actual.charAt(i));            if (limit != actual.length())                mismatchInfo += "\netc.";        } else if(expected.length() > actual.length()) {            mismatchInfo = "\n\nEXPECTED result has "+(expected.length()-actual.length())+" extra characters at the end. They are :";            int limit = Math.min (actual.length() + 10, expected.length());            for (int i = actual.length(); i < expected.length(); i++)                mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) expected.charAt(i));            if (limit != expected.length ())                mismatchInfo += "\netc.";        }        for (int i = 0; i < expected.length(); i++) {            if (                    (expected.length() != actual.length() &&                        (                            i >= (expected.length()-1 ) ||                            i >= (actual.length()-1 )                        )                    ) ||                    (mCaseInsensitiveComparisons && Character.toUpperCase (actual.charAt(i)) != Character.toUpperCase (expected.charAt(i))) ||                    (!mCaseInsensitiveComparisons && (actual.charAt(i) != expected.charAt(i)))                ) {                    StringBuffer errorMsg = new StringBuffer();                    errorMsg.append(                        message +mismatchInfo + " \nMismatch of strings at char posn " + i +                        " \n\nString Expected upto mismatch = " +                        expected.substring(0, i) +                        " \n\nString Actual upto mismatch = " +                        actual.substring(0, i)                    );                    if (i<expected.length())                       errorMsg.append(                            " \n\nString Expected MISMATCH CHARACTER = "+                            expected.charAt(i) + ", code = " +                            (int) expected.charAt(i)                        );                    if (i<actual.length())                        errorMsg.append(                            " \n\nString Actual MISMATCH CHARACTER = " +                            actual.charAt(i) + ", code = " +                            (int) actual.charAt(i)                        );                    errorMsg.append(                        " \n\n**** COMPLETE STRING EXPECTED ****\n" +                        expected +                        " \n\n**** COMPLETE STRING ACTUAL***\n" + actual                    );                    System.out.println ("string differs, expected \"" + expected + "\", actual \"" + actual + "\"");                    failWithMessage(errorMsg.toString());            }        }    }    public void failWithMessage(String message) {    	fail(message);	}	public void parseNodes() throws ParserException{        nodeCount = 0;        for (NodeIterator e = parser.elements();e.hasMoreNodes();)        {            node[nodeCount++] = e.nextNode();        }    }    public void assertNodeCount(int nodeCountExpected) {        StringBuffer msg = new StringBuffer();        for (int i=0;i<nodeCount;i++) {            msg.append(node[i].getClass().getName());            msg.append("-->\n").append(node[i].toHtml()).append("\n");        }        if (nodeCountExpected != nodeCount)            System.out.println ("node count differs, expected " + nodeCountExpected + ", actual " + nodeCount);        assertEquals("Number of nodes parsed didn't match, nodes found were :\n"+msg.toString(),nodeCountExpected,nodeCount);    }    public void parseAndAssertNodeCount(int nodeCountExpected) throws ParserException {        parseNodes();        assertNodeCount(nodeCountExpected);    }    public void assertSameType(String displayMessage, Node expected, Node actual) {        String expectedNodeName = expected.getClass().getName();        String actualNodeName = actual.getClass().getName();        displayMessage =            "The types did not match: Expected "+            expectedNodeName+" \nbut was "+            actualNodeName+"\nEXPECTED XML:"+expected.toHtml()+"\n"+            "ACTUAL XML:"+actual.toHtml()+displayMessage;        assertStringEquals(displayMessage, expectedNodeName, actualNodeName);    }    public void assertTagEquals(String displayMessage, Node expected, Node actual) {        if (expected instanceof Tag) {            Tag expectedTag = (Tag)expected;            Tag actualTag   = (Tag)actual;            assertTagNameMatches(displayMessage, expectedTag, actualTag);            assertAttributesMatch(displayMessage, expectedTag, actualTag);        }    }    private void assertTagNameMatches(        String displayMessage,        Tag nextExpectedTag,        Tag nextActualTag) {        String expectedTagName = nextExpectedTag.getTagName();        String actualTagName = nextActualTag.getTagName();        displayMessage = "The tag names did not match: Expected "+expectedTagName+" \nbut was "+actualTagName+displayMessage;        assertStringEquals(displayMessage, expectedTagName, actualTagName);    }    public void assertXmlEquals(String displayMessage, String expected, String actual) throws Exception    {        Node nextExpectedNode;        Node nextActualNode;        Tag tag1;        Tag tag2;        expected = removeEscapeCharacters(expected);        actual   = removeEscapeCharacters(actual);        Parser expectedParser = Parser.createParser(expected, null);        Parser resultParser   = Parser.createParser(actual, null);        NodeIterator expectedIterator = expectedParser.elements();        NodeIterator actualIterator =  resultParser.elements();        displayMessage = createGenericFailureMessage(displayMessage, expected, actual);        nextExpectedNode = null;        nextActualNode = null;        tag1 = null;        tag2 = null;        do {            if (null != tag1)                nextExpectedNode = tag1;            else                nextExpectedNode = getNextNodeUsing (expectedIterator);            if (null != tag2)                nextActualNode = tag2;            else                nextActualNode = getNextNodeUsing (actualIterator);            assertNotNull (nextActualNode);            tag1 = fixIfXmlEndTag (expectedParser.getLexer ().getPage (), nextExpectedNode);            tag2 = fixIfXmlEndTag (resultParser.getLexer ().getPage (), nextActualNode);            assertStringValueMatches(                displayMessage,                nextExpectedNode,                nextActualNode            );            assertSameType(displayMessage, nextExpectedNode, nextActualNode);            assertTagEquals(displayMessage, nextExpectedNode, nextActualNode);        }        while (expectedIterator.hasMoreNodes() || (null != tag1));        assertActualXmlHasNoMoreNodes(displayMessage, actualIterator);    }    private Node getNextNodeUsing(NodeIterator nodeIterator)        throws ParserException {        Node nextNode;        String text=null;        do {            nextNode = nodeIterator.nextNode();            if (nextNode instanceof Text) {                text = nextNode.toPlainTextString().trim();            } else text = null;        }        while (text!=null && text.length()==0);        return nextNode;    }    private void assertStringValueMatches(
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -