📄 tagtest.java
字号:
"</div>"; createParser(testHTML,"http://www.cia.gov"); parseAndAssertNodeCount(1); // Check the tags assertType("node",Div.class,node[0]); Div div = (Div)node[0]; Tag fontTag = (Tag)div.children().nextNode(); // an alternate interpretation: assertEquals("Second tag should be corrected","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText()); assertEquals("Second tag should be corrected","font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText()); assertEquals("font sans-serif parameter","sans-serif",fontTag.getAttribute("SANS-SERIF")); // an alternate interpretation: assertEquals("font face parameter","Arial,helvetica,",table.get("FACE")); // another: assertEquals("font face parameter","Arial,\"helvetica,",table.get("FACE")); assertEquals("font face parameter","Arial,",fontTag.getAttribute("FACE")); } public void testToHTML() throws ParserException { String tag1 = "<MYTAG abcd\n"+ "efgh\n"+ "ijkl\n"+ "mnop>"; String testHTML = tag1 + "\n"+ "<TITLE>Hello</TITLE>\n"+ "<A HREF=\"Hello.html\">Hey</A>"; createParser(testHTML); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(9); // The node should be an Tag assertTrue("1st Node should be a Tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertStringEquals("toHTML()",tag1,tag.toHtml()); assertTrue("3rd Node should be a Tag",node[2] instanceof Tag); assertTrue("5th Node should be a Tag",node[6] instanceof Tag); tag = (Tag)node[2]; assertEquals("Raw String of the tag","<TITLE>",tag.toHtml()); tag = (Tag)node[6]; assertEquals("Raw String of the tag","<A HREF=\"Hello.html\">",tag.toHtml()); } /** * Test parseParameter method * Created by Kaarle Kaila (22 Oct 2001) * This test just wants the text in the element */ public void testWithoutParseParameter() throws ParserException{ Node node; String testHTML = "<A href=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaarle\">Kaarle's homepage</A><p>Paragraph</p>"; createParser(testHTML); NodeIterator en = parser.elements(); String result=""; while (en.hasMoreNodes()) { node = en.nextNode(); result += node.toHtml(); } assertStringEquals("Check collected contents to original", testHTML, result); } /** * Test parseParameter method * Created by Kaarle Kaila (09 Jan 2003) * This test just wants the text in the element */ public void testEmptyTagParseParameter() throws ParserException{ Node node; String testHTML = "<INPUT name=\"foo\" value=\"foobar\" type=\"text\" />"; createParser(testHTML); NodeIterator en = parser.elements(); String result=""; while (en.hasMoreNodes()) { node = en.nextNode(); result = node.toHtml(); } assertStringEquals("Check collected contents to original", testHTML, result); } public void testStyleSheetTag() throws ParserException{ String testHTML1 = new String("<link rel src=\"af.css\"/>"); createParser(testHTML1,"http://www.google.com/test/index.html"); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertEquals("StyleSheet Source","af.css",tag.getAttribute("src")); } /** * Bug report by Cedric Rosa, causing null pointer exceptions when encountering a broken tag, * and if this has no further lines to parse */ public void testBrokenTag() throws ParserException{ String testHTML1 = new String("<br"); createParser(testHTML1); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertEquals("Node contents","br",tag.getText()); } public void testTagInsideTag() throws ParserException { String testHTML = new String("<META name=\"Hello\" value=\"World </I>\">"); createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertEquals("Node contents","META name=\"Hello\" value=\"World </I>\"",tag.getText()); assertEquals("Meta Content","World </I>",tag.getAttribute("value")); } public void testIncorrectInvertedCommas() throws ParserException { String content = "DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F."; String author = "Author"; String guts = "META NAME=\"" + author + "\" CONTENT = \"" + content + "\""; String testHTML = "<" + guts + ">"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertStringEquals("Node contents",guts,tag.getText()); assertEquals("Meta Content",author,tag.getAttribute("NAME")); // // Big todo here: // This involves a change in the lexer state machine from // six states to probably 8, or perhaps a half dozen 'substates' // on state zero... // we shy away from this at the moment:// assertEquals("Meta Content",content,tag.getAttribute("CONTENT")); } public void testIncorrectInvertedCommas2() throws ParserException { String guts = "META NAME=\"Keywords\" CONTENT=Moscou, modernisation, politique urbaine, sp\u00e9cificit\u00e9s culturelles, municipalit\u00e9, Moscou, modernisation, urban politics, cultural specificities, municipality\""; String testHTML = "<" + guts + ">"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertStringEquals("Node contents",guts,tag.getText()); } public void testIncorrectInvertedCommas3() throws ParserException { String testHTML = new String("<meta name=\"description\" content=\"Une base de donn\u00e9es sur les th\u00e8ses de g\"ographie soutenues en France \">"); createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertEquals("Node contents","meta name=\"description\" content=\"Une base de donn\u00e9es sur les th\u00e8ses de g\"ographie soutenues en France \"",tag.getText()); } /** * Ignore empty tags. */ public void testEmptyTag() throws ParserException { String testHTML = "<html><body><>text</body></html>"; createParser(testHTML); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(5); assertTrue("Third node should be a string node",node[2] instanceof Text); Text stringNode = (Text)node[2]; assertEquals("Third node has incorrect text","<>text",stringNode.getText()); } /** * Ignore empty tags. */ public void testEmptyTag2() throws ParserException { String testHTML = "<html><body>text<></body></html>"; createParser(testHTML); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(5); assertTrue("Third node should be a string node",node[2] instanceof Text); Text stringNode = (Text)node[2]; assertEquals("Third node has incorrect text","text<>",stringNode.getText()); } /** * Ignore empty tags. */ public void testEmptyTag3() throws ParserException { String testHTML = "<html><body>text<>text</body></html>"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Only node should be an HTML node",node[0] instanceof Html); Html html = (Html)node[0]; assertTrue("HTML node should have one child",1 == html.getChildCount ()); assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag); BodyTag body = (BodyTag)html.getChild(0); assertTrue("BODY node should have one child",1 == body.getChildCount ()); assertTrue("Only node should be a string node",body.getChild(0) instanceof Text); Text stringNode = (Text)body.getChild(0); assertEquals("Third node has incorrect text","text<>text",stringNode.getText()); } /** * Ignore empty tags. */ public void testEmptyTag4() throws ParserException { String testHTML = "<html><body>text\n<>text</body></html>"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Only node should be an HTML node",node[0] instanceof Html); Html html = (Html)node[0]; assertTrue("HTML node should have one child",1 == html.getChildCount ()); assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag); BodyTag body = (BodyTag)html.getChild(0); assertTrue("BODY node should have one child",1 == body.getChildCount ()); assertTrue("Only node should be a string node",body.getChild(0) instanceof Text); Text stringNode = (Text)body.getChild(0); String actual = stringNode.getText(); assertEquals("Third node has incorrect text","text\n<>text",actual); } /** * Ignore empty tags. */ public void testEmptyTag5() throws ParserException { String testHTML = "<html><body>text<\n>text</body></html>"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Only node should be an HTML node",node[0] instanceof Html); Html html = (Html)node[0]; assertTrue("HTML node should have one child",1 == html.getChildCount ()); assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag); BodyTag body = (BodyTag)html.getChild(0); assertTrue("BODY node should have one child",1 == body.getChildCount ()); assertTrue("Only node should be a string node",body.getChild(0) instanceof Text); Text stringNode = (Text)body.getChild(0); String actual = stringNode.getText(); assertEquals("Third node has incorrect text","text<\n>text",actual); } /** * Ignore empty tags. */ public void testEmptyTag6() throws ParserException { String testHTML = "<html><body>text<>\ntext</body></html>"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Only node should be an HTML node",node[0] instanceof Html); Html html = (Html)node[0]; assertTrue("HTML node should have one child",1 == html.getChildCount ()); assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag); BodyTag body = (BodyTag)html.getChild(0); assertTrue("BODY node should have one child",1 == body.getChildCount ()); assertTrue("Only node should be a string node",body.getChild(0) instanceof Text); Text stringNode = (Text)body.getChild(0); String actual = stringNode.getText(); assertEquals("Third node has incorrect text","text<>\ntext",actual); } public void testAttributesReconstruction() throws ParserException { String expectedHTML = "<TEXTAREA name=\"JohnDoe\" >"; String testHTML = expectedHTML + "</TEXTAREA>"; createParser(testHTML); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(2); assertTrue("First node should be an HTMLtag",node[0] instanceof Tag); Tag htmlTag = (Tag)node[0]; assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); } public void testIgnoreState() throws ParserException { String testHTML = "<A \n"+ "HREF=\"/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html\">20020702 Report 1</A>"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; String href = tag.getAttribute("HREF"); assertStringEquals("Resolved Link","/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html",href); } /** * See bug #726913 toHtml() method incomplete */ public void testSetText() throws ParserException { String testHTML = "<LABEL ID=\"JohnDoe\">John Doe</LABEL>"; createParser(testHTML); parseAndAssertNodeCount(1); org.htmlparser.tags.LabelTag htmlTag = (org.htmlparser.tags.LabelTag)node[0]; String expectedHTML = "<LABEL ID=\"JohnDoe\">John Doe</LABEL>"; assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); assertStringEquals("Expected HTML","John Doe",htmlTag.getLabel()); ((org.htmlparser.Text)((org.htmlparser.tags.CompositeTag)htmlTag).getChild(0)).setText("Jane Doe"); expectedHTML = "<LABEL ID=\"JohnDoe\">Jane Doe</LABEL>"; assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); assertStringEquals("Expected HTML","Jane Doe",htmlTag.getLabel()); } /** * From oyoaha */ public void testTabText () throws ParserException { String testHTML = "<a\thref=\"http://cbc.ca\">"; createParser (testHTML); parseAndAssertNodeCount (1); assertTrue("Node should be a LinkTag", node[0] instanceof LinkTag); LinkTag tag = (LinkTag)node[0]; String href = tag.getAttribute ("HREF"); assertStringEquals("Resolved Link","http://cbc.ca", href); } /** * See bug #741026 registerScanners() mangles output HTML badly. */ public void testHTMLOutputOfDifficultLinksWithRegisterScanners () throws ParserException { // straight out of a real world example String html = "<a href=http://www.google.com/webhp?hl=en>"; createParser (html); String temp = null; for (NodeIterator e = parser.elements (); e.hasMoreNodes ();) { Node newNode = e.nextNode (); // Get the next HTML Node temp = newNode.toHtml(); } assertNotNull ("No nodes", temp); assertStringEquals ("Incorrect HTML output: ", html + "</a>", temp); } /** * See bug #740411 setParsed() has no effect on output. */ public void testParameterChange() throws ParserException { createParser("<TABLE BORDER=0>"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); // the node should be a Tag assertTrue("Node should be a Tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertEquals("Initial text should be","TABLE BORDER=0",tag.getText ()); tag.setAttribute ("BORDER","\"1\""); assertEquals("HTML should be","<TABLE BORDER=\"1\">", tag.toHtml ()); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -