⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uurifactorytest.java

📁 最强的爬虫工程
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
		assertTrue("Not equal " + uuri.toString(),				uuri.toString().equals(tgtUri));	}		/**	 * Test space plus encoding ([ 1010966 ] crawl.log has URIs with spaces in them).	 * See <a href="http://sourceforge.net/tracker/index.php?func=detail&aid=1010966&group_id=73833&atid=539099">[ 1010966 ] crawl.log has URIs with spaces in them</a>.	 * @throws URIException	 */	public final void testSpaceDoubleEncoding() throws URIException {		final String uri = "http://www.brook.edu/i.html? %20taxonomy=Politics";		final String encodedUri =			"http://www.brook.edu/i.html?%20%20taxonomy=Politics";		UURI uuri = UURIFactory.getInstance(uri, "ISO-8859-1");		assertTrue("Not equal " + uuri.toString(),				uuri.toString().equals(encodedUri));	}		/**	 * Test for doubly-encoded sequences.	 * See <a href="https://sourceforge.net/tracker/index.php?func=detail&aid=966219&group_id=73833&atid=539099">[ 966219 ] UURI doubly-encodes %XX sequences</a>.	 * @throws URIException	 */	public final void testDoubleEncoding() throws URIException {		final char ae = '\u00E6';		final String uri = "http://archive.org/DIR WITH SPACES/home" +		    ae + ".html";		final String encodedUri =			"http://archive.org/DIR%20WITH%20SPACES/home%E6.html";		UURI uuri = UURIFactory.getInstance(uri, "ISO-8859-1");		assertEquals("single encoding", encodedUri, uuri.toString());		// Dbl-encodes.		uuri = UURIFactory.getInstance(uuri.toString(), "ISO-8859-1");		uuri = UURIFactory.getInstance(uuri.toString(), "ISO-8859-1");		assertEquals("double encoding", encodedUri, uuri.toString());		// Do default utf-8 test.		uuri = UURIFactory.getInstance(uri);		final String encodedUtf8Uri =			"http://archive.org/DIR%20WITH%20SPACES/home%C3%A6.html";		assertEquals("Not equal utf8", encodedUtf8Uri, uuri.toString());      		// Now dbl-encode.		uuri = UURIFactory.getInstance(uuri.toString());		uuri = UURIFactory.getInstance(uuri.toString());		assertEquals("Not equal (dbl-encoding) utf8", encodedUtf8Uri, uuri.toString());	}		/**	 * Test for syntax errors stop page parsing.	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=788219&group_id=73833&atid=539099">[ 788219 ] URI Syntax Errors stop page parsing</a>	 * @throws URIException	 */	public final void testThreeSlashes() throws URIException {		UURI goodURI = UURIFactory.		getInstance("http://lcweb.loc.gov/rr/goodtwo.html");		String uuri = "http:///lcweb.loc.gov/rr/goodtwo.html";		UURI rewrittenURI = UURIFactory.getInstance(uuri);		assertTrue("Not equal " + goodURI + ", " + uuri,				goodURI.toString().equals(rewrittenURI.toString()));		uuri = "http:////lcweb.loc.gov/rr/goodtwo.html";		rewrittenURI = UURIFactory.getInstance(uuri);		assertTrue("Not equal " + goodURI + ", " + uuri,				goodURI.toString().equals(rewrittenURI.toString()));		// Check https.		goodURI = UURIFactory.		getInstance("https://lcweb.loc.gov/rr/goodtwo.html");		uuri = "https:////lcweb.loc.gov/rr/goodtwo.html";		rewrittenURI = UURIFactory.getInstance(uuri);		assertTrue("Not equal " + goodURI + ", " + uuri,				goodURI.toString().equals(rewrittenURI.toString()));	}		public final void testNoScheme() {		boolean expectedException = false;		String uuri = "www.loc.gov/rr/european/egw/polishex.html";		try {			UURIFactory.getInstance(uuri);		} catch (URIException e) {			// Expected exception.			expectedException = true;		}		assertTrue("Didn't get expected exception: " + uuri, 				expectedException); 	}		public final void testRelative() throws URIException {		UURI uuriTgt = UURIFactory.		getInstance("http://archive.org:83/home.html");		UURI uri = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uuri = UURIFactory.		getInstance(uri, "/home.html");		assertTrue("Not equal",				uuriTgt.toString().equals(uuri.toString()));	}		/**	 * Test that an empty uuri does the right thing -- that we get back the	 * base.	 *	 * @throws URIException	 */	public final void testRelativeEmpty() throws URIException {		UURI uuriTgt = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uri = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uuri = UURIFactory.		getInstance(uri, "");		assertTrue("Empty length don't work",				uuriTgt.toString().equals(uuri.toString()));	}		public final void testAbsolute() throws URIException {		UURI uuriTgt = UURIFactory.		getInstance("http://archive.org:83/home.html");		UURI uri = UURIFactory.		getInstance("http://archive.org:83/one/two/three.html");		UURI uuri = UURIFactory.		getInstance(uri, "http://archive.org:83/home.html");		assertTrue("Not equal",				uuriTgt.toString().equals(uuri.toString()));	}		/**	 * Test for [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts).	 * @see <a href="https://sourceforge.net/tracker/?func=detail&atid=539099&aid=962892&group_id=73833">[ 962892 ] UURI accepting/creating unUsable URIs (bad hosts)</a>	 */	public final void testHostWithLessThan() {		checkExceptionOnIllegalDomainlabel("http://www.betamobile.com</A");		checkExceptionOnIllegalDomainlabel(		"http://C|/unzipped/426/spacer.gif");		checkExceptionOnIllegalDomainlabel("http://www.lycos.co.uk\"/l/b/\"");	}    		/**	 * Test for [ 1012520 ] UURI.length() &gt; 2k.	 * @throws URIException	 * @see <a href="http://sourceforge.net/tracker/index.php?func=detail&aid=1012520&group_id=73833&atid=539099">[ 1012520 ] UURI.length() &gt; 2k</a>	 */	public final void test2kURI() throws URIException {		final StringBuffer buffer = new StringBuffer("http://a.b");		final String subPath = "/123456789";		for (int i = 0; i < 207; i++) {			buffer.append(subPath);		}		// String should be 2080 characters long.  Legal.		UURIFactory.getInstance(buffer.toString());		boolean gotException = false;		// Add ten more characters and make size illegal.		buffer.append(subPath);		try {			UURIFactory.getInstance(buffer.toString()); 		} catch (URIException e) {			gotException = true;		}		assertTrue("No expected exception complaining about long URI",				gotException);	} 		private void checkExceptionOnIllegalDomainlabel(String uuri) {		boolean expectedException = false;        try {			UURIFactory.getInstance(uuri);		} catch (URIException e) {			// Expected exception.			expectedException = true;		}		assertTrue("Didn't get expected exception: " + uuri, 				expectedException); 	}		/**	 * Test for doing separate DNS lookup for same host	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=788277&group_id=73833&atid=539099">[ 788277 ] Doing separate DNS lookup for same host</a>	 * @throws URIException	 */	public final void testHostWithPeriod() throws URIException {		UURI uuri1 = UURIFactory.		getInstance("http://www.loc.gov./index.html");		UURI uuri2 = UURIFactory.		getInstance("http://www.loc.gov/index.html");		assertEquals("Failed equating hosts with dot",				uuri1.getHost(), uuri2.getHost());	}		/**	 * Test for NPE in java.net.URI.encode	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=874220&group_id=73833&atid=539099">[ 874220 ] NPE in java.net.URI.encode</a>	 * @throws URIException	 */	public final void testHostEncodedChars() throws URIException {		String s = "http://g.msn.co.kr/0nwkokr0/00/19??" +		"PS=10274&NC=10009&CE=42&CP=949&HL=" +		"&#65533;&#65533;&#65533;?&#65533;&#65533;";		assertNotNull("Encoded chars " + s, 				UURIFactory.getInstance(s));	}		/**	 * Test for java.net.URI parses %20 but getHost null	 *	 * See <a href="https://sourceforge.net/tracker/?func=detail&aid=927940&group_id=73833&atid=539099">[ 927940 ] java.net.URI parses %20 but getHost null</a>	 */	public final void testSpaceInHost() {		boolean expectedException = false;		try {			UURIFactory.getInstance(					"http://www.local-regions.odpm%20.gov.uk" +			"/lpsa/challenge/pdf/propect.pdf");		} catch (URIException e) {			expectedException = true;		}		assertTrue("Did not fail with escaped space.", expectedException);				expectedException = false;		try {			UURIFactory.getInstance(					"http://www.local-regions.odpm .gov.uk" +			"/lpsa/challenge/pdf/propect.pdf");		} catch (URIException e) {			expectedException = true;		}		assertTrue("Did not fail with real space.", expectedException);	}		/**	 * Test for java.net.URI chokes on hosts_with_underscores.	 *	 * @see  <a href="https://sourceforge.net/tracker/?func=detail&aid=808270&group_id=73833&atid=539099">[ 808270 ] java.net.URI chokes on hosts_with_underscores</a>	 * @throws URIException	 */	public final void testHostWithUnderscores() throws URIException {		UURI uuri = UURIFactory.getInstance(		"http://x_underscore_underscore.2u.com.tw/nonexistent_page.html");		assertEquals("Failed get of host with underscore",				"x_underscore_underscore.2u.com.tw", uuri.getHost());	}			/**	 * Two dots for igor.	 */	public final void testTwoDots() {		boolean expectedException = false;		try {			UURIFactory.getInstance(			"http://x_underscore_underscore..2u.com/nonexistent_page.html");		} catch (URIException e) {			expectedException = true;		}		assertTrue("Two dots did not throw exception", expectedException);	}		/**	 * Test for java.net.URI#getHost fails when leading digit.	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=910120&group_id=73833&atid=539099">[ 910120 ] java.net.URI#getHost fails when leading digit.</a>	 * @throws URIException	 */	public final void testHostWithDigit() throws URIException {		UURI uuri = UURIFactory.		getInstance("http://0204chat.2u.com.tw/nonexistent_page.html");		assertEquals("Failed get of host with digit",				"0204chat.2u.com.tw", uuri.getHost());	}		/**	 * Test for Constraining java URI class.	 *	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=949548&group_id=73833&atid=539099">[ 949548 ] Constraining java URI class</a>	 */	public final void testPort() {		checkBadPort("http://www.tyopaikat.com:a/robots.txt");		checkBadPort("http://158.144.21.3:80808/robots.txt");		checkBadPort("http://pdb.rutgers.edu:81.rutgers.edu/robots.txt");		checkBadPort(		    "https://webmail.gse.harvard.edu:9100robots.txt/robots.txt");		checkBadPort(		    "https://webmail.gse.harvard.edu:0/robots.txt/robots.txt");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -