📄 parsertest.java
字号:
NodeClassFilter filter = new NodeClassFilter (LinkTag.class); for (NodeIterator e = parser.elements();e.hasMoreNodes();) e.nextNode().collectInto(collectionList,filter); assertEquals("Size of collection vector should be 11",11,collectionList.size()); // All items in collection vector should be links for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { Node node = e.nextNode(); assertTrue("Only links should have been parsed",node instanceof LinkTag); } } public void testImageCollection() throws ParserException { createParser( "<html>\n"+ "<head>\n"+ "<meta name=\"generator\" content=\"Created Using Yahoo! PageBuilder 2.60.24\">\n"+ "</head>\n"+ "<body bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#FF0000\" text=\"#000000\"\n"+ " onLoad=\"window.onresize=new Function('if (navigator.appVersion==\'Netscape\') history.go(0);');\">\n"+ "<div id=\"layer0\" style=\"position:absolute;left:218;top:40;width:240;height:26;\">\n"+ "<table width=240 height=26 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><b><font size=\"+2\"><span style=\"font-size:24\">NISHI-HONGWAN-JI</span></font></b></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer1\" style=\"position:absolute;left:75;top:88;width:542;height:83;\">\n"+ "<table width=542 height=83 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><span style=\"font-size:14\">The Nihi Hongwanj-ji temple is very traditional, very old, and very beautiful. This is the place that we stayed on our first night in Kyoto. We then attended the morning prayer ceremony, at 6:30 am. Staying here costed us 7,500 yen, which was inclusive of dinner and breakfast, and usage of the o-furo (public bath). Felt more like a luxury hotel than a temple.</span></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer2\" style=\"position:absolute;left:144;top:287;width:128;height:96;\">\n"+ "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><a href=\"nishi-hongwanji1.html\"><img height=96 width=128 src=\"nishi-hongwanji1-thumb.jpg\" border=0 ></a></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer3\" style=\"position:absolute;left:415;top:285;width:128;height:96;\">\n"+ "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><a href=\"nishi-hongwanji3.html\"><img height=96 width=128 src=\"nishi-hongwanji2-thumb.jpg\" border=0 ></a></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer4\" style=\"position:absolute;left:414;top:182;width:128;height:96;\">\n"+ "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><a href=\"higashi-hongwanji.html\"><img height=96 width=128 src=\"higashi-hongwanji-thumb.jpg\" border=0 ></a></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer5\" style=\"position:absolute;left:78;top:396;width:530;height:49;\">\n"+ "<table width=530 height=49 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><span style=\"font-size:14\">Click on the pictures to see the full-sized versions. The picture at the top right corner is taken in Higashi-Hongwanji. Nishi means west, and Higashi means east. These two temples are adjacent to each other and represent two different Buddhist sects.</span></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer6\" style=\"position:absolute;left:143;top:180;width:128;height:102;\">\n"+ "<table width=128 height=102 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><a href=\"nishi-hongwanji4.html\"><img height=102 width=128 src=\"nishi-hongwanji4-thumb.jpg\" border=0 ></a></td>\n"+ "</tr></table></div>\n"+ "<div id=\"layer7\" style=\"position:absolute;left:280;top:235;width:124;height:99;\">\n"+ "<table width=124 height=99 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ "<td><a href=\"nishi-hongwanji-lodging.html\"><img height=99 width=124 src=\"nishi-hongwanji-lodging-thumb.jpg\" border=0 ></a></td>\n"+ "</tr></table></div>\n"+ "</body>\n"+ "</html>"); NodeList collectionList = new NodeList(); TagNameFilter filter = new TagNameFilter ("IMG"); for (NodeIterator e = parser.elements();e.hasMoreNodes();) e.nextNode().collectInto(collectionList,filter); assertEquals("Size of collection vector should be 5",5,collectionList.size()); // All items in collection vector should be links for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { Node node = e.nextNode(); assertTrue("Only images should have been parsed",node instanceof ImageTag); } } /** * See bug #728241 OutOfMemory error/ Infinite loop */ public void testOutOfMemory () throws Exception { createParser ( "<html><head></head>\n" + "<body>\n" + "<table>\n" + "<tr>\n" + " <td><img src=\"foo.jpg\" alt=\"f's b\"><font\n" + " size=1>blah</font>\n" + "</td>\n" + "</tr>\n" + "</table>\n" + "</body></html>\n"); for (NodeIterator e = parser.elements();e.hasMoreNodes();) { e.nextNode(); } } /** * See bug #729368 Embedded quote and split tag */ public void testEmbeddedQuoteSplit () throws Exception { createParser ( "<html><head></head>\n" + "<body>\n" + "<table>\n" + "<tr><td><img src=\"x\" alt=\"f's b\"><font\n" + "size=1>blah</font></td></tr>\n" + "</table>\n" + "</body></html>"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); int i = 0; for (NodeIterator e = parser.elements();e.hasMoreNodes();) { Node node = e.nextNode(); if (10 == i) { assertTrue ("not a tag", node instanceof Tag); assertTrue ("ALT attribute incorrect", ((Tag)node).getAttribute ("ALT").equals ("f's b")); } i++; } assertEquals("Expected nodes",21,i); } /** * See bug #826764 ParserException occurs only when using setInputHTML() instea */ public void testSetInputHTML () throws Exception { String html; String path; File file; PrintWriter out; Node[] nodes; html = "<html></html>"; createParser (html); path = System.getProperty ("user.dir"); if (!path.endsWith (File.separator)) path += File.separator; file = new File (path + "delete_me.html"); try { out = new PrintWriter (new FileWriter (file)); out.print ("<html>\r\n"); out.print ("<head>\r\n"); out.print ("<!-- BEGIN TYPE -->\r\n"); out.print ("<!-- NAVIGATION -->\r\n"); out.print ("<!-- END TYPE -->\r\n"); out.print ("<!-- BEGIN TITLE -->\r\n"); out.print ("<title>Einstiegsseite</title>\r\n"); out.print ("<!-- END TITLE -->\r\n"); out.print ("</head>\r\n"); out.print ("<body>\r\n"); out.print ("<ul>\r\n"); out.print ("<li>\r\n"); out.print ("<!-- BEGIN ITEM -->\r\n"); out.print ("<!-- BEGIN REF -->\r\n"); out.print ("<a href=\"kapitel1/index.html\">\r\n"); out.print ("<!-- END REF -->\r\n"); out.print ("<!-- BEGIN REFTITLE -->\r\n"); out.print ("Kapitel 1\r\n"); out.print ("<!-- END REFTITLE -->\r\n"); out.print ("</a>\r\n"); out.print ("<!-- END ITEM -->\r\n"); out.print ("</li>\r\n"); out.print ("<li>\r\n"); out.print ("<!-- BEGIN ITEM -->\r\n"); out.print ("<!-- BEGIN REF -->\r\n"); out.print ("<a href=\"kapitel2/index.html\">\r\n"); out.print ("<!-- END REF -->\r\n"); out.print ("<!-- BEGIN REFTITLE -->\r\n"); out.print ("Kapitel 2\r\n"); out.print ("<!-- END REFTITLE -->\r\n"); out.print ("</a>\r\n"); out.print ("<!-- END ITEM -->\r\n"); out.print ("</li>\r\n"); out.print ("<li>\r\n"); out.print ("<!-- BEGIN ITEM -->\r\n"); out.print ("<!-- BEGIN REF -->\r\n"); out.print ("<a href=\"kapitel3/index.html\">\r\n"); out.print ("<!-- END REF -->\r\n"); out.print ("<!-- BEGIN REFTITLE -->\r\n"); out.print ("Kapitel 3\r\n"); out.print ("<!-- END REFTITLE -->\r\n"); out.print ("</a>\r\n"); out.print ("<!-- END ITEM -->\r\n"); out.print ("</li>\r\n"); out.print ("</ul>\r\n"); out.print ("</body>\r\n"); out.print ("</html>"); out.close (); DataInputStream stream = new DataInputStream ( new BufferedInputStream (new FileInputStream (file))); byte[] buffer = new byte[(int)file.length ()]; stream.readFully (buffer); html = new String (buffer); try { parser.setInputHTML (html); nodes = parser.extractAllNodesThatAre (LinkTag.class); } catch (ParserException e) { e.printStackTrace (); nodes = new Node[0]; } assertTrue ("node count", 3 == nodes.length); } catch (Exception e) { fail (e.toString ()); } finally { file.delete (); } } /** * Test reproducing a java.lang.StackOverflowError. */ public void testXMLTypeToString () throws Exception { String guts; String output; guts = "TD width=\"69\"/"; createParser ("<" + guts + ">"); parseAndAssertNodeCount (1); output = node[0].toString (); // this was where StackOverflow was thrown assertTrue ("bad toString()", -1 != output.indexOf (guts)); } /** * See bug #883664 toUpperCase on tag names and attributes depends on locale */ public void testDifferentLocale () throws Exception { String html; Locale original; html = "<title>This is supposedly Turkish.</title>"; original = Locale.getDefault (); try { Locale.setDefault (new Locale ("tr")); // turkish createParser (html); parseAndAssertNodeCount (1); assertStringEquals ("html", html, node[0].toHtml ()); } finally { Locale.setDefault (original); } } /** * See bug #900128 RemarkNode.setText() does not set Text */ public void testSetStringText () throws Exception { String text; String html; String newtext; String newhtml; Node txt; text = "This is just text."; html = "<body>" + text + "</body>"; newtext = "This is different text."; newhtml = "<body>" + newtext + "</body>"; createParser (html); parseAndAssertNodeCount (1); assertStringEquals ("html wrong", html, node[0].toHtml ()); assertTrue ("wrong number of children", 1 == node[0].getChildren ().size ()); assertTrue ("string node expected", node[0].getChildren ().elementAt (0) instanceof Text); txt = node[0].getChildren ().elementAt (0); assertStringEquals ("string html wrong", text, txt.toHtml ()); assertStringEquals ("string contents wrong", text, txt.getText ()); assertTrue ("toString wrong", txt.toString ().endsWith (text)); txt.setText (newtext); assertStringEquals ("html wrong", newhtml, node[0].toHtml ()); assertStringEquals ("new string html wrong", newtext, txt.toHtml ()); assertStringEquals ("new string contents wrong", newtext, txt.getText ()); assertTrue ("toString wrong", txt.toString ().endsWith (newtext)); } /** * See bug #900128 RemarkNode.setText() does not set Text */ public void testSetRemarkText () throws Exception { String text; String remark; String html; String newtext; String newremark; String newhtml; Node rem; text = " This is a remark. "; remark = "<!--" + text + "-->"; html = "<body>" + remark + "</body>"; newtext = " This is a different remark. "; newremark = "<!--" + newtext + "-->"; newhtml = "<body>" + newremark + "</body>"; createParser (html); parseAndAssertNodeCount (1); assertStringEquals ("html wrong", html, node[0].toHtml ()); assertTrue ("wrong number of children", 1 == node[0].getChildren ().size ()); assertTrue ("remark node expected", node[0].getChildren ().elementAt (0) instanceof Remark); rem = node[0].getChildren ().elementAt (0); assertStringEquals ("remark html wrong", remark, rem.toHtml ()); assertStringEquals ("remark contents wrong", text, rem.getText ()); assertTrue ("toString wrong", rem.toString ().endsWith (text)); rem.setText (newtext); assertStringEquals ("html wrong", newhtml, node[0].toHtml ()); assertStringEquals ("new remark html wrong", newremark, rem.toHtml ()); assertStringEquals ("new remark contents wrong", newtext, rem.getText ()); assertTrue ("toString wrong", rem.toString ().endsWith (newtext)); rem.setText (newremark); assertStringEquals ("html wrong", newhtml, node[0].toHtml ()); assertStringEquals ("new remark html wrong", newremark, rem.toHtml ()); assertStringEquals ("new remark contents wrong", newtext, rem.getText ()); assertTrue ("toString wrong", rem.toString ().endsWith (newtext)); } public void testFixSpaces () throws ParserException { String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; parser = new Parser (url); assertEquals("Expected","http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html", parser.getURL ()); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -